diff --git a/.mailmap b/.mailmap index 70d9a3648c780..bba79204eb390 100644 --- a/.mailmap +++ b/.mailmap @@ -22,6 +22,8 @@ # # Please keep this file sorted. + + @@ -30,3 +32,4 @@ Jon Roelofs Jon Roelofs Jon Roelofs Jonathan Roelofs Jon Roelofs Jonathan Roelofs Martin Storsjö +Saleem Abdulrasool diff --git a/buildbot/dependency.conf b/buildbot/dependency.conf index f299efdbee018..9d6ef957b4bd4 100644 --- a/buildbot/dependency.conf +++ b/buildbot/dependency.conf @@ -4,8 +4,8 @@ ocl_cpu_rt_ver=2021.12.6.0.19 # https://github.com/intel/llvm/releases/download/2021-WW26/win-oclcpuexp-2021.12.6.0.19_rel.zip ocl_cpu_rt_ver_win=2021.12.6.0.19 # Same GPU driver supports Level Zero and OpenCL -# https://github.com/intel/compute-runtime/releases/tag/21.19.19792 -ocl_gpu_rt_ver=21.19.19792 +# https://github.com/intel/compute-runtime/releases/tag/21.23.20043 +ocl_gpu_rt_ver=21.23.20043 # Same GPU driver supports Level Zero and OpenCL # https://downloadmirror.intel.com/30381/a08/igfx_win10_100.9466.zip ocl_gpu_rt_ver_win=27.20.100.9466 @@ -30,7 +30,7 @@ ocloc_ver_win=27.20.100.9168 [DRIVER VERSIONS] cpu_driver_lin=2021.12.6.0.19 cpu_driver_win=2021.12.6.0.19 -gpu_driver_lin=21.19.19792 +gpu_driver_lin=21.23.20043 gpu_driver_win=27.20.100.9466 fpga_driver_lin=2021.12.6.0.19 fpga_driver_win=2021.12.6.0.19 diff --git a/clang-tools-extra/clang-tidy/ClangTidyCheck.cpp b/clang-tools-extra/clang-tidy/ClangTidyCheck.cpp index 6f7f6611c8d20..f447ee41f6f8f 100644 --- a/clang-tools-extra/clang-tidy/ClangTidyCheck.cpp +++ b/clang-tools-extra/clang-tidy/ClangTidyCheck.cpp @@ -150,11 +150,11 @@ llvm::Optional ClangTidyCheck::OptionsView::getEnumInt( unsigned EditDistance = 3; for (const auto &NameAndEnum : Mapping) { if (IgnoreCase) { - if (Value.equals_lower(NameAndEnum.second)) + if (Value.equals_insensitive(NameAndEnum.second)) return NameAndEnum.first; } else if (Value.equals(NameAndEnum.second)) { return NameAndEnum.first; - } else if (Value.equals_lower(NameAndEnum.second)) { + } else if (Value.equals_insensitive(NameAndEnum.second)) { Closest = NameAndEnum.second; EditDistance = 0; continue; diff --git a/clang-tools-extra/clang-tidy/altera/KernelNameRestrictionCheck.cpp b/clang-tools-extra/clang-tidy/altera/KernelNameRestrictionCheck.cpp index f7c71cca71594..769494207aef2 100644 --- a/clang-tools-extra/clang-tidy/altera/KernelNameRestrictionCheck.cpp +++ b/clang-tools-extra/clang-tidy/altera/KernelNameRestrictionCheck.cpp @@ -70,9 +70,9 @@ void KernelNameRestrictionPPCallbacks::InclusionDirective( bool KernelNameRestrictionPPCallbacks::fileNameIsRestricted( StringRef FileName) { - return FileName.equals_lower("kernel.cl") || - FileName.equals_lower("verilog.cl") || - FileName.equals_lower("vhdl.cl"); + return FileName.equals_insensitive("kernel.cl") || + FileName.equals_insensitive("verilog.cl") || + FileName.equals_insensitive("vhdl.cl"); } void KernelNameRestrictionPPCallbacks::EndOfMainFile() { diff --git a/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp b/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp index 32af0ee6266d2..e50ebdba3b343 100644 --- a/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp +++ b/clang-tools-extra/clang-tidy/bugprone/ArgumentCommentCheck.cpp @@ -176,8 +176,8 @@ static bool sameName(StringRef InComment, StringRef InDecl, bool StrictMode) { return InComment == InDecl; InComment = InComment.trim('_'); InDecl = InDecl.trim('_'); - // FIXME: compare_lower only works for ASCII. - return InComment.compare_lower(InDecl) == 0; + // FIXME: compare_insensitive only works for ASCII. + return InComment.compare_insensitive(InDecl) == 0; } static bool looksLikeExpectMethod(const CXXMethodDecl *Expect) { diff --git a/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp b/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp index 6f056e73ad5d6..9ff94a009d0dd 100644 --- a/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp +++ b/clang-tools-extra/clang-tidy/cppcoreguidelines/PreferMemberInitializerCheck.cpp @@ -159,7 +159,7 @@ void PreferMemberInitializerCheck::check( if (S->getBeginLoc().isMacroID()) { StringRef MacroName = Lexer::getImmediateMacroName( S->getBeginLoc(), *Result.SourceManager, getLangOpts()); - if (MacroName.contains_lower("assert")) + if (MacroName.contains_insensitive("assert")) return; } if (isControlStatement(S)) diff --git a/clang-tools-extra/clang-tidy/misc/StaticAssertCheck.cpp b/clang-tools-extra/clang-tidy/misc/StaticAssertCheck.cpp index 224936887e033..93df4915e9f74 100644 --- a/clang-tools-extra/clang-tidy/misc/StaticAssertCheck.cpp +++ b/clang-tools-extra/clang-tidy/misc/StaticAssertCheck.cpp @@ -104,8 +104,8 @@ void StaticAssertCheck::check(const MatchFinder::MatchResult &Result) { StringRef FalseMacroName = Lexer::getImmediateMacroName(FalseLiteralLoc, SM, Opts); - if (FalseMacroName.compare_lower("false") == 0 || - FalseMacroName.compare_lower("null") == 0) + if (FalseMacroName.compare_insensitive("false") == 0 || + FalseMacroName.compare_insensitive("null") == 0) return; } diff --git a/clang-tools-extra/clang-tidy/readability/InconsistentDeclarationParameterNameCheck.cpp b/clang-tools-extra/clang-tidy/readability/InconsistentDeclarationParameterNameCheck.cpp index b3945b5a932f5..9d467e54a98d8 100644 --- a/clang-tools-extra/clang-tidy/readability/InconsistentDeclarationParameterNameCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/InconsistentDeclarationParameterNameCheck.cpp @@ -93,8 +93,8 @@ bool nameMatch(StringRef L, StringRef R, bool Strict) { return L.empty() || R.empty() || L == R; // We allow two names if one is a prefix/suffix of the other, ignoring case. // Important special case: this is true if either parameter has no name! - return L.startswith_lower(R) || R.startswith_lower(L) || - L.endswith_lower(R) || R.endswith_lower(L); + return L.startswith_insensitive(R) || R.startswith_insensitive(L) || + L.endswith_insensitive(R) || R.endswith_insensitive(L); } DifferingParamsContainer diff --git a/clang-tools-extra/clang-tidy/readability/UppercaseLiteralSuffixCheck.cpp b/clang-tools-extra/clang-tidy/readability/UppercaseLiteralSuffixCheck.cpp index 827711e92e877..6abb5c3b877d6 100644 --- a/clang-tools-extra/clang-tidy/readability/UppercaseLiteralSuffixCheck.cpp +++ b/clang-tools-extra/clang-tidy/readability/UppercaseLiteralSuffixCheck.cpp @@ -93,7 +93,7 @@ getNewSuffix(llvm::StringRef OldSuffix, // Else, find matching suffix, case-*insensitive*ly. auto NewSuffix = llvm::find_if( NewSuffixes, [OldSuffix](const std::string &PotentialNewSuffix) { - return OldSuffix.equals_lower(PotentialNewSuffix); + return OldSuffix.equals_insensitive(PotentialNewSuffix); }); // Have a match, return it. if (NewSuffix != NewSuffixes.end()) diff --git a/clang-tools-extra/clangd/CodeComplete.cpp b/clang-tools-extra/clangd/CodeComplete.cpp index 19f33a40f889b..b36cfd4ba00cf 100644 --- a/clang-tools-extra/clangd/CodeComplete.cpp +++ b/clang-tools-extra/clangd/CodeComplete.cpp @@ -62,6 +62,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringExtras.h" #include "llvm/ADT/StringRef.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Error.h" @@ -1679,7 +1680,7 @@ class CodeCompleteFlow { C.SemaResult->Kind == CodeCompletionResult::RK_Macro) || (C.IndexResult && C.IndexResult->SymInfo.Kind == index::SymbolKind::Macro)) && - !C.Name.startswith_lower(Filter->pattern())) + !C.Name.startswith_insensitive(Filter->pattern())) return None; return Filter->match(C.Name); } @@ -1910,6 +1911,13 @@ bool isIndexedForCodeCompletion(const NamedDecl &ND, ASTContext &ASTCtx) { if (isExplicitTemplateSpecialization(&ND)) return false; + // Category decls are not useful on their own outside the interface or + // implementation blocks. Moreover, sema already provides completion for + // these, even if it requires preamble deserialization. So by excluding them + // from the index, we reduce the noise in all the other completion scopes. + if (llvm::isa(&ND) || llvm::isa(&ND)) + return false; + if (InTopLevelScope(ND)) return true; diff --git a/clang-tools-extra/clangd/CompileCommands.cpp b/clang-tools-extra/clangd/CompileCommands.cpp index 633d13b8b9f0d..e749720b83a13 100644 --- a/clang-tools-extra/clangd/CompileCommands.cpp +++ b/clang-tools-extra/clangd/CompileCommands.cpp @@ -296,9 +296,9 @@ enum DriverMode : unsigned char { DriverMode getDriverMode(const std::vector &Args) { DriverMode Mode = DM_GCC; llvm::StringRef Argv0 = Args.front(); - if (Argv0.endswith_lower(".exe")) + if (Argv0.endswith_insensitive(".exe")) Argv0 = Argv0.drop_back(strlen(".exe")); - if (Argv0.endswith_lower("cl")) + if (Argv0.endswith_insensitive("cl")) Mode = DM_CL; for (const llvm::StringRef Arg : Args) { if (Arg == "--driver-mode=cl") { diff --git a/clang-tools-extra/clangd/ConfigYAML.cpp b/clang-tools-extra/clangd/ConfigYAML.cpp index f5739de0092d9..8a493346d2ce9 100644 --- a/clang-tools-extra/clangd/ConfigYAML.cpp +++ b/clang-tools-extra/clangd/ConfigYAML.cpp @@ -169,7 +169,7 @@ class Parser { void parse(Fragment::IndexBlock::ExternalBlock &F, Located ExternalVal) { - if (!llvm::StringRef(*ExternalVal).equals_lower("none")) { + if (!llvm::StringRef(*ExternalVal).equals_insensitive("none")) { error("Only scalar value supported for External is 'None'", ExternalVal.Range); return; diff --git a/clang-tools-extra/clangd/HeaderSourceSwitch.cpp b/clang-tools-extra/clangd/HeaderSourceSwitch.cpp index cd493a72b2427..e3e2ab3ea8694 100644 --- a/clang-tools-extra/clangd/HeaderSourceSwitch.cpp +++ b/clang-tools-extra/clangd/HeaderSourceSwitch.cpp @@ -27,13 +27,13 @@ llvm::Optional getCorrespondingHeaderOrSource( // Lookup in a list of known extensions. auto SourceIter = llvm::find_if(SourceExtensions, [&PathExt](PathRef SourceExt) { - return SourceExt.equals_lower(PathExt); + return SourceExt.equals_insensitive(PathExt); }); bool IsSource = SourceIter != std::end(SourceExtensions); auto HeaderIter = llvm::find_if(HeaderExtensions, [&PathExt](PathRef HeaderExt) { - return HeaderExt.equals_lower(PathExt); + return HeaderExt.equals_insensitive(PathExt); }); bool IsHeader = HeaderIter != std::end(HeaderExtensions); diff --git a/clang-tools-extra/clangd/InlayHints.cpp b/clang-tools-extra/clangd/InlayHints.cpp index 1002aee218477..c1a8357201e9c 100644 --- a/clang-tools-extra/clangd/InlayHints.cpp +++ b/clang-tools-extra/clangd/InlayHints.cpp @@ -156,7 +156,7 @@ class InlayHintVisitor : public RecursiveASTVisitor { return false; StringRef Name = getSimpleName(*Callee); - if (!Name.startswith_lower("set")) + if (!Name.startswith_insensitive("set")) return false; // In addition to checking that the function has one parameter and its @@ -168,10 +168,10 @@ class InlayHintVisitor : public RecursiveASTVisitor { // This currently doesn't handle cases where params use snake_case // and functions don't, e.g. // void setExceptionHandler(EHFunc exception_handler); - // We could improve this by replacing `equals_lower` with some + // We could improve this by replacing `equals_insensitive` with some // `sloppy_equals` which ignores case and also skips underscores. StringRef WhatItIsSetting = Name.substr(3).ltrim("_"); - return WhatItIsSetting.equals_lower(ParamNames[0]); + return WhatItIsSetting.equals_insensitive(ParamNames[0]); } bool shouldHint(const Expr *Arg, StringRef ParamName) { diff --git a/clang-tools-extra/clangd/Quality.cpp b/clang-tools-extra/clangd/Quality.cpp index 99421009c71ca..dc9c75b576708 100644 --- a/clang-tools-extra/clangd/Quality.cpp +++ b/clang-tools-extra/clangd/Quality.cpp @@ -378,7 +378,7 @@ static llvm::Optional wordMatching(llvm::StringRef Name, const llvm::StringSet<> *ContextWords) { if (ContextWords) for (const auto &Word : ContextWords->keys()) - if (Name.contains_lower(Word)) + if (Name.contains_insensitive(Word)) return Word; return llvm::None; } @@ -552,7 +552,7 @@ evaluateDecisionForest(const SymbolQualitySignals &Quality, int NumMatch = 0; if (Relevance.ContextWords) { for (const auto &Word : Relevance.ContextWords->keys()) { - if (Relevance.Name.contains_lower(Word)) { + if (Relevance.Name.contains_insensitive(Word)) { ++NumMatch; } } diff --git a/clang-tools-extra/clangd/index/SymbolCollector.cpp b/clang-tools-extra/clangd/index/SymbolCollector.cpp index 84d0ca9bbbd16..9211dd3eb48a5 100644 --- a/clang-tools-extra/clangd/index/SymbolCollector.cpp +++ b/clang-tools-extra/clangd/index/SymbolCollector.cpp @@ -285,7 +285,8 @@ class SymbolCollector::HeaderFileURICache { Line = Line.ltrim(); if (!Line.startswith("error")) return false; - return Line.contains_lower("includ"); // Matches "include" or "including". + return Line.contains_insensitive( + "includ"); // Matches "include" or "including". } // Heuristically headers that only want to be included via an umbrella. diff --git a/clang-tools-extra/clangd/index/remote/server/Server.cpp b/clang-tools-extra/clangd/index/remote/server/Server.cpp index 7073cc0dc5670..04ad0b2a1936f 100644 --- a/clang-tools-extra/clangd/index/remote/server/Server.cpp +++ b/clang-tools-extra/clangd/index/remote/server/Server.cpp @@ -37,12 +37,18 @@ #include #include #include +#include #include +#include #if ENABLE_GRPC_REFLECTION #include #endif +#ifdef __GLIBC__ +#include +#endif + namespace clang { namespace clangd { namespace remote { @@ -74,6 +80,12 @@ llvm::cl::opt LogPublic{ llvm::cl::init(false), }; +llvm::cl::opt LogPrefix{ + "log-prefix", + llvm::cl::desc("A string that'll be prepended to all log statements. " + "Useful when running multiple instances on same host."), +}; + llvm::cl::opt TraceFile( "trace-file", llvm::cl::desc("Path to the file where tracer logs will be stored")); @@ -354,12 +366,25 @@ class Monitor final : public v1::Monitor::Service { std::atomic> IndexBuildTime; }; +void maybeTrimMemory() { +#if defined(__GLIBC__) && CLANGD_MALLOC_TRIM + malloc_trim(0); +#endif +} + // Detect changes in \p IndexPath file and load new versions of the index // whenever they become available. void hotReload(clangd::SwapIndex &Index, llvm::StringRef IndexPath, llvm::vfs::Status &LastStatus, llvm::IntrusiveRefCntPtr &FS, Monitor &Monitor) { + // glibc malloc doesn't shrink an arena if there are items living at the end, + // which might happen since we destroy the old index after building new one. + // Trim more aggresively to keep memory usage of the server low. + // Note that we do it deliberately here rather than after Index.reset(), + // because old index might still be kept alive after the reset call if we are + // serving requests. + maybeTrimMemory(); auto Status = FS->status(IndexPath); // Requested file is same as loaded index: no reload is needed. if (!Status || (Status->getLastModificationTime() == @@ -410,27 +435,48 @@ void runServerAndWait(clangd::SymbolIndex &Index, llvm::StringRef ServerAddress, ServerShutdownWatcher.join(); } -std::unique_ptr makeLogger(llvm::raw_ostream &OS) { - if (!LogPublic) - return std::make_unique(OS, LogLevel); - // Redacted mode: - // - messages outside the scope of a request: log fully - // - messages tagged [public]: log fully - // - errors: log the format string - // - others: drop - class RedactedLogger : public StreamLogger { +std::unique_ptr makeLogger(llvm::StringRef LogPrefix, + llvm::raw_ostream &OS) { + std::unique_ptr Base; + if (LogPublic) { + // Redacted mode: + // - messages outside the scope of a request: log fully + // - messages tagged [public]: log fully + // - errors: log the format string + // - others: drop + class RedactedLogger : public StreamLogger { + public: + using StreamLogger::StreamLogger; + void log(Level L, const char *Fmt, + const llvm::formatv_object_base &Message) override { + if (Context::current().get(CurrentRequest) == nullptr || + llvm::StringRef(Fmt).startswith("[public]")) + return StreamLogger::log(L, Fmt, Message); + if (L >= Error) + return StreamLogger::log(L, Fmt, + llvm::formatv("[redacted] {0}", Fmt)); + } + }; + Base = std::make_unique(OS, LogLevel); + } else { + Base = std::make_unique(OS, LogLevel); + } + + if (LogPrefix.empty()) + return Base; + class PrefixedLogger : public Logger { + std::string LogPrefix; + std::unique_ptr Base; + public: - using StreamLogger::StreamLogger; + PrefixedLogger(llvm::StringRef LogPrefix, std::unique_ptr Base) + : LogPrefix(LogPrefix.str()), Base(std::move(Base)) {} void log(Level L, const char *Fmt, const llvm::formatv_object_base &Message) override { - if (Context::current().get(CurrentRequest) == nullptr || - llvm::StringRef(Fmt).startswith("[public]")) - return StreamLogger::log(L, Fmt, Message); - if (L >= Error) - return StreamLogger::log(L, Fmt, llvm::formatv("[redacted] {0}", Fmt)); + Base->log(L, Fmt, llvm::formatv("[{0}] {1}", LogPrefix, Message)); } }; - return std::make_unique(OS, LogLevel); + return std::make_unique(LogPrefix, std::move(Base)); } } // namespace @@ -454,7 +500,7 @@ int main(int argc, char *argv[]) { llvm::errs().SetBuffered(); // Don't flush stdout when logging for thread safety. llvm::errs().tie(nullptr); - auto Logger = makeLogger(llvm::errs()); + auto Logger = makeLogger(LogPrefix.getValue(), llvm::errs()); clang::clangd::LoggingSession LoggingSession(*Logger); llvm::Optional TracerStream; diff --git a/clang-tools-extra/clangd/support/Path.cpp b/clang-tools-extra/clangd/support/Path.cpp index 44302c808502b..1dd107cc6abef 100644 --- a/clang-tools-extra/clangd/support/Path.cpp +++ b/clang-tools-extra/clangd/support/Path.cpp @@ -13,7 +13,7 @@ namespace clangd { #ifdef CLANGD_PATH_CASE_INSENSITIVE std::string maybeCaseFoldPath(PathRef Path) { return Path.lower(); } -bool pathEqual(PathRef A, PathRef B) { return A.equals_lower(B); } +bool pathEqual(PathRef A, PathRef B) { return A.equals_insensitive(B); } #else // NOT CLANGD_PATH_CASE_INSENSITIVE std::string maybeCaseFoldPath(PathRef Path) { return Path.str(); } bool pathEqual(PathRef A, PathRef B) { return A == B; } diff --git a/clang-tools-extra/clangd/test/remote-index/log-prefix.test b/clang-tools-extra/clangd/test/remote-index/log-prefix.test new file mode 100644 index 0000000000000..fb00576bbd5c4 --- /dev/null +++ b/clang-tools-extra/clangd/test/remote-index/log-prefix.test @@ -0,0 +1,18 @@ +# RUN: rm -rf %t +# RUN: clangd-indexer %S/Inputs/Source.cpp > %t.idx +# RUN: %python %S/pipeline_helper.py --input-file-name=%s --server-arg=--log=verbose --server-arg=-log-prefix=test-prefix --server-log=%t.log --project-root=%S --index-file=%t.idx > /dev/null +# RUN: FileCheck %s < %t.log +# REQUIRES: clangd-remote-index + +# CHECK: [test-prefix] Server listening on +{"jsonrpc":"2.0","id":0,"method":"initialize","params":{"processId":123,"rootPath":"clangd","capabilities":{},"trace":"off"}} +--- +{"jsonrpc":"2.0","id":1,"method":"workspace/symbol","params":{"query":"gFoo"}} +# CHECK: [test-prefix] <<< FuzzyFindRequest +# CHECK: [test-prefix] >>> FuzzyFindReply +# CHECK: [test-prefix] [public] request v1/FuzzyFind +--- +{"jsonrpc":"2.0","id":4,"method":"shutdown"} +--- +{"jsonrpc":"2.0","method":"exit"} + diff --git a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp index 78f6e7c4aa9fe..cfa2def7a615d 100644 --- a/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp +++ b/clang-tools-extra/clangd/unittests/CodeCompleteTests.cpp @@ -3221,6 +3221,45 @@ TEST(CompletionTest, NoCrashDueToMacroOrdering) { UnorderedElementsAre(Labeled("ECHO(X)"), Labeled("ECHO2(X)"))); } +TEST(CompletionTest, ObjCCategoryDecls) { + TestTU TU; + TU.ExtraArgs.push_back("-xobjective-c"); + TU.HeaderCode = R"objc( + @interface Foo + @end + + @interface Foo (FooExt1) + @end + + @interface Foo (FooExt2) + @end + + @interface Bar + @end + + @interface Bar (BarExt) + @end)objc"; + + { + Annotations Test(R"objc( + @implementation Foo (^) + @end + )objc"); + TU.Code = Test.code().str(); + auto Results = completions(TU, Test.point()); + EXPECT_THAT(Results.Completions, + UnorderedElementsAre(Labeled("FooExt1"), Labeled("FooExt2"))); + } + { + Annotations Test(R"objc( + @interface Foo (^) + @end + )objc"); + TU.Code = Test.code().str(); + auto Results = completions(TU, Test.point()); + EXPECT_THAT(Results.Completions, UnorderedElementsAre(Labeled("BarExt"))); + } +} } // namespace } // namespace clangd } // namespace clang diff --git a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp index 87df23baf48a7..e24c3c1d60b47 100644 --- a/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp +++ b/clang-tools-extra/clangd/unittests/SymbolCollectorTests.cpp @@ -552,8 +552,9 @@ TEST_F(SymbolCollectorTest, ObjCSymbols) { EXPECT_THAT(Symbols, UnorderedElementsAre( QName("Person"), QName("Person::someMethodName:lastName:"), - QName("MyCategory"), QName("Person::someMethodName2:"), - QName("MyProtocol"), QName("MyProtocol::someMethodName3:"))); + AllOf(QName("MyCategory"), ForCodeCompletion(false)), + QName("Person::someMethodName2:"), QName("MyProtocol"), + QName("MyProtocol::someMethodName3:"))); } TEST_F(SymbolCollectorTest, ObjCPropertyImpl) { diff --git a/clang-tools-extra/modularize/ModularizeUtilities.cpp b/clang-tools-extra/modularize/ModularizeUtilities.cpp index 200370c135dfa..3bf761c0a22c3 100644 --- a/clang-tools-extra/modularize/ModularizeUtilities.cpp +++ b/clang-tools-extra/modularize/ModularizeUtilities.cpp @@ -470,9 +470,9 @@ bool ModularizeUtilities::isHeader(StringRef FileName) { StringRef Extension = llvm::sys::path::extension(FileName); if (Extension.size() == 0) return true; - if (Extension.equals_lower(".h")) + if (Extension.equals_insensitive(".h")) return true; - if (Extension.equals_lower(".inc")) + if (Extension.equals_insensitive(".inc")) return true; return false; } diff --git a/clang/cmake/caches/Fuchsia-stage2.cmake b/clang/cmake/caches/Fuchsia-stage2.cmake index 85d96bc52d529..eb001ef6579ce 100644 --- a/clang/cmake/caches/Fuchsia-stage2.cmake +++ b/clang/cmake/caches/Fuchsia-stage2.cmake @@ -291,6 +291,7 @@ set(LLVM_TOOLCHAIN_TOOLS llvm-symbolizer llvm-xray sancov + scan-build-py CACHE STRING "") set(LLVM_DISTRIBUTION_COMPONENTS diff --git a/clang/docs/ClangFormat.rst b/clang/docs/ClangFormat.rst index d5333c0032b47..4a1422e85b06c 100644 --- a/clang/docs/ClangFormat.rst +++ b/clang/docs/ClangFormat.rst @@ -11,12 +11,12 @@ Standalone Tool =============== :program:`clang-format` is located in `clang/tools/clang-format` and can be used -to format C/C++/Java/JavaScript/Objective-C/Protobuf/C# code. +to format C/C++/Java/JavaScript/JSON/Objective-C/Protobuf/C# code. .. code-block:: console $ clang-format -help - OVERVIEW: A tool to format C/C++/Java/JavaScript/Objective-C/Protobuf/C# code. + OVERVIEW: A tool to format C/C++/Java/JavaScript/JSON/Objective-C/Protobuf/C# code. If no arguments are specified, it formats the code from standard input and writes the result to the standard output. diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index 0d0c07fa350fd..96d89db7a5ccf 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -2377,6 +2377,28 @@ the configuration (without a prefix: ``Auto``). For example: BOOST_FOREACH. +**IfMacros** (``std::vector``) + A vector of macros that should be interpreted as conditionals + instead of as function calls. + + These are expected to be macros of the form: + + .. code-block:: c++ + + IF(...) + + else IF(...) + + + In the .clang-format configuration file, this can be configured like: + + .. code-block:: yaml + + IfMacros: ['IF'] + + For example: `KJ_IF_MAYBE + `_ + **IncludeBlocks** (``IncludeBlocksStyle``) Dependent on the value, multiple ``#include`` blocks can be sorted as one and divided based on category. @@ -2841,6 +2863,42 @@ the configuration (without a prefix: ``Auto``). bar(); } } +**LambdaBodyIndentation** (``LambdaBodyIndentationKind``) + The indentation style of lambda bodies. ``Signature`` (the default) + causes the lambda body to be indented one additional level relative to + the indentation level of the signature. ``OuterScope`` forces the lambda + body to be indented one additional level relative to the parent scope + containing the lambda signature. For callback-heavy code, it may improve + readability to have the signature indented two levels and to use + ``OuterScope``. The KJ style guide requires ``OuterScope``. + `KJ style guide + `_ + + Possible values: + + * ``LBI_Signature`` (in configuration: ``Signature``) + Align lambda body relative to the lambda signature. This is the default. + + .. code-block:: c++ + + someMethod( + [](SomeReallyLongLambdaSignatureArgument foo) { + return; + }); + + * ``LBI_OuterScope`` (in configuration: ``OuterScope``) + Align lambda body relative to the indentation level of the outer scope + the lambda signature resides in. + + .. code-block:: c++ + + someMethod( + [](SomeReallyLongLambdaSignatureArgument foo) { + return; + }); + + + **Language** (``LanguageKind``) Language, this format style is targeted at. @@ -2861,6 +2919,9 @@ the configuration (without a prefix: ``Auto``). * ``LK_JavaScript`` (in configuration: ``JavaScript``) Should be used for JavaScript. + * ``LK_Json`` (in configuration: ``Json``) + Should be used for JSON. + * ``LK_ObjC`` (in configuration: ``ObjC``) Should be used for Objective-C, Objective-C++. @@ -3178,6 +3239,38 @@ the configuration (without a prefix: ``Auto``). BasedOnStyle: llvm CanonicalDelimiter: 'cc' +**ReferenceAlignment** (``ReferenceAlignmentStyle``) + Reference alignment style (overrides ``PointerAlignment`` for + references). + + Possible values: + + * ``RAS_Pointer`` (in configuration: ``Pointer``) + Align reference like ``PointerAlignment``. + + * ``RAS_Left`` (in configuration: ``Left``) + Align reference to the left. + + .. code-block:: c++ + + int& a; + + * ``RAS_Right`` (in configuration: ``Right``) + Align reference to the right. + + .. code-block:: c++ + + int &a; + + * ``RAS_Middle`` (in configuration: ``Middle``) + Align reference in the middle. + + .. code-block:: c++ + + int & a; + + + **ReflowComments** (``bool``) If ``true``, clang-format will attempt to re-flow comments. @@ -3444,10 +3537,12 @@ the configuration (without a prefix: ``Auto``). } } - * ``SBPO_ControlStatementsExceptForEachMacros`` (in configuration: ``ControlStatementsExceptForEachMacros``) + * ``SBPO_ControlStatementsExceptControlMacros`` (in configuration: ``ControlStatementsExceptControlMacros``) Same as ``SBPO_ControlStatements`` except this option doesn't apply to - ForEach macros. This is useful in projects where ForEach macros are - treated as function calls instead of control statements. + ForEach and If macros. This is useful in projects where ForEach/If + macros are treated as function calls instead of control statements. + ``SBPO_ControlStatementsExceptForEachMacros`` remains an alias for + backward compatability. .. code-block:: c++ diff --git a/clang/docs/DataFlowSanitizerDesign.rst b/clang/docs/DataFlowSanitizerDesign.rst index fc1ca9840f6e9..7615a2acc58b8 100644 --- a/clang/docs/DataFlowSanitizerDesign.rst +++ b/clang/docs/DataFlowSanitizerDesign.rst @@ -76,25 +76,41 @@ The following is the memory layout for Linux/x86\_64: +---------------+---------------+--------------------+ | Start | End | Use | +===============+===============+====================+ -| 0x700000008000|0x800000000000 | application memory | +| 0x700000000000|0x800000000000 | application 3 | +---------------+---------------+--------------------+ -| 0x300000000000|0x700000008000 | unused | +| 0x610000000000|0x700000000000 | unused | +---------------+---------------+--------------------+ -| 0x200000008000|0x300000000000 | origin | +| 0x600000000000|0x610000000000 | origin 1 | +---------------+---------------+--------------------+ -| 0x200000000000|0x200000008000 | unused | +| 0x510000000000|0x600000000000 | application 2 | +---------------+---------------+--------------------+ -| 0x100000008000|0x200000000000 | shadow memory | +| 0x500000000000|0x510000000000 | shadow 1 | +---------------+---------------+--------------------+ -| 0x000000010000|0x100000008000 | unused | +| 0x400000000000|0x500000000000 | unused | +---------------+---------------+--------------------+ -| 0x000000000000|0x000000010000 | reserved by kernel | +| 0x300000000000|0x400000000000 | origin 3 | ++---------------+---------------+--------------------+ +| 0x200000000000|0x300000000000 | shadow 3 | ++---------------+---------------+--------------------+ +| 0x110000000000|0x200000000000 | origin 2 | ++---------------+---------------+--------------------+ +| 0x100000000000|0x110000000000 | unused | ++---------------+---------------+--------------------+ +| 0x010000000000|0x100000000000 | shadow 2 | ++---------------+---------------+--------------------+ +| 0x000000000000|0x010000000000 | application 1 | +---------------+---------------+--------------------+ Each byte of application memory corresponds to a single byte of shadow -memory, which is used to store its taint label. As for LLVM SSA -registers, we have not found it necessary to associate a label with -each byte or bit of data, as some other tools do. Instead, labels are +memory, which is used to store its taint label. We map memory, shadow, and +origin regions to each other with these masks and offsets: + +* shadow_addr = memory_addr ^ 0x500000000000 + +* origin_addr = shadow_addr + 0x100000000000 + +As for LLVM SSA registers, we have not found it necessary to associate a label +with each byte or bit of data, as some other tools do. Instead, labels are associated directly with registers. Loads will result in a union of all shadow labels corresponding to bytes loaded, and stores will result in a copy of the label of the stored value to the shadow of all diff --git a/clang/docs/LanguageExtensions.rst b/clang/docs/LanguageExtensions.rst index 36aaecd17999f..2efbafae62fda 100644 --- a/clang/docs/LanguageExtensions.rst +++ b/clang/docs/LanguageExtensions.rst @@ -523,6 +523,64 @@ float matrices and add the result to a third 4x4 matrix. return a + b * c; } +The matrix type extension also supports operations on a matrix and a scalar. + +.. code-block:: c++ + + typedef float m4x4_t __attribute__((matrix_type(4, 4))); + + m4x4_t f(m4x4_t a) { + return (a + 23) * 12; + } + +The matrix type extension supports division on a matrix and a scalar but not on a matrix and a matrix. + +.. code-block:: c++ + + typedef float m4x4_t __attribute__((matrix_type(4, 4))); + + m4x4_t f(m4x4_t a) { + a = a / 3.0; + return a; + } + +The matrix type extension supports compound assignments for addition, subtraction, and multiplication on matrices +and on a matrix and a scalar, provided their types are consistent. + +.. code-block:: c++ + + typedef float m4x4_t __attribute__((matrix_type(4, 4))); + + m4x4_t f(m4x4_t a, m4x4_t b) { + a += b; + a -= b; + a *= b; + a += 23; + a -= 12; + return a; + } + +The matrix type extension supports explicit casts. Implicit type conversion between matrix types is not allowed. + +.. code-block:: c++ + + typedef int ix5x5 __attribute__((matrix_type(5, 5))); + typedef float fx5x5 __attribute__((matrix_type(5, 5))); + + fx5x5 f1(ix5x5 i, fx5x5 f) { + return (fx5x5) i; + } + + + template + using matrix_4_4 = X __attribute__((matrix_type(4, 4))); + + void f2() { + matrix_5_5 d; + matrix_5_5 i; + i = (matrix_5_5)d; + i = static_cast>(d); + } Half-Precision Floating Point ============================= diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index bddaea6e48461..f26c484c5bace 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -259,6 +259,16 @@ clang-format - Option ``BreakInheritanceList`` gets a new style, ``AfterComma``. It breaks only after the commas that separate the base-specifiers. +- Option ``LambdaBodyIndentation`` has been added to control how the body of a + lambda is indented. The default ``Signature`` value indents the body one level + relative to whatever indentation the signature has. ``OuterScope`` lets you + change that so that the lambda body is indented one level relative to the scope + containing the lambda, regardless of where the lambda signature was placed. + +- Option ``IfMacros`` has been added. This lets you define macros that get + formatted like conditionals much like ``ForEachMacros`` get styled like + foreach loops. + - ``git-clang-format`` no longer formats changes to symbolic links. (Fixes https://llvm.org/PR46992.) @@ -268,6 +278,8 @@ clang-format - Option ``AlignArrayOfStructure`` has been added to allow for ordering array-like initializers. +- Support for formatting JSON file (\*.json) has been added to clang-format. + libclang -------- diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 68f2d84442beb..8cb1ab43cc196 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -2586,9 +2586,9 @@ def NoInstrumentFunction : InheritableAttr { } def NoProfileFunction : InheritableAttr { - let Spellings = [Clang<"no_profile">]; + let Spellings = [GCC<"no_profile_instrument_function">]; let Subjects = SubjectList<[Function]>; - let Documentation = [NoProfileDocs]; + let Documentation = [NoProfileInstrumentFunctionDocs]; let SimpleHandler = 1; } diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 956ee6f96e4ce..159d217f36043 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -3742,12 +3742,12 @@ This attribute accepts a single parameter that must be one of the following: }]; } -def NoProfileDocs : Documentation { +def NoProfileInstrumentFunctionDocs : Documentation { let Category = DocCatFunction; let Content = [{ -Use the ``no_profile`` attribute on a function declaration to denote that the -compiler should not instrument the function with profile-related -instrumentation, such as via the +Use the ``no_profile_instrument_function`` attribute on a function declaration +to denote that the compiler should not instrument the function with +profile-related instrumentation, such as via the ``-fprofile-generate`` / ``-fprofile-instr-generate`` / ``-fcs-profile-generate`` / ``-fprofile-arcs`` flags. }]; diff --git a/clang/include/clang/Basic/BuiltinsPPC.def b/clang/include/clang/Basic/BuiltinsPPC.def index 9d7f765a2133c..e07632d415109 100644 --- a/clang/include/clang/Basic/BuiltinsPPC.def +++ b/clang/include/clang/Basic/BuiltinsPPC.def @@ -45,6 +45,16 @@ BUILTIN(__builtin_ppc_dcbt, "vv*", "") BUILTIN(__builtin_ppc_dcbtst, "vv*", "") BUILTIN(__builtin_ppc_dcbz, "vv*", "") BUILTIN(__builtin_ppc_icbt, "vv*", "") +BUILTIN(__builtin_ppc_compare_and_swap, "iiD*i*i", "") +BUILTIN(__builtin_ppc_compare_and_swaplp, "iLiD*Li*Li", "") +BUILTIN(__builtin_ppc_fetch_and_add, "UiUiD*Ui", "") +BUILTIN(__builtin_ppc_fetch_and_addlp, "ULiULiD*ULi", "") +BUILTIN(__builtin_ppc_fetch_and_and, "UiUiD*Ui", "") +BUILTIN(__builtin_ppc_fetch_and_andlp, "ULiULiD*ULi", "") +BUILTIN(__builtin_ppc_fetch_and_or, "UiUiD*Ui", "") +BUILTIN(__builtin_ppc_fetch_and_orlp, "ULiULiD*ULi", "") +BUILTIN(__builtin_ppc_fetch_and_swap, "UiUiD*Ui", "") +BUILTIN(__builtin_ppc_fetch_and_swaplp, "ULiULiD*ULi", "") BUILTIN(__builtin_ppc_get_timebase, "ULLi", "n") diff --git a/clang/include/clang/Basic/Cuda.h b/clang/include/clang/Basic/Cuda.h index 13317f62c3c5a..aa12724cbf0c6 100644 --- a/clang/include/clang/Basic/Cuda.h +++ b/clang/include/clang/Basic/Cuda.h @@ -89,6 +89,7 @@ enum class CudaArch { GFX1032, GFX1033, GFX1034, + GFX1035, LAST, }; diff --git a/clang/include/clang/Basic/Diagnostic.h b/clang/include/clang/Basic/Diagnostic.h index f389a39e84a54..0cc2a1df15584 100644 --- a/clang/include/clang/Basic/Diagnostic.h +++ b/clang/include/clang/Basic/Diagnostic.h @@ -16,9 +16,9 @@ #include "clang/Basic/DiagnosticIDs.h" #include "clang/Basic/DiagnosticOptions.h" +#include "clang/Basic/OptReportHandler.h" #include "clang/Basic/SourceLocation.h" #include "clang/Basic/Specifiers.h" -#include "clang/Basic/SyclOptReportHandler.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/IntrusiveRefCntPtr.h" @@ -295,7 +295,7 @@ class DiagnosticsEngine : public RefCountedBase { DiagnosticConsumer *Client = nullptr; std::unique_ptr Owner; SourceManager *SourceMgr = nullptr; - SyclOptReportHandler OptReportHandler; + SyclOptReportHandler SyclOptReport; /// Mapping information for diagnostics. /// @@ -549,11 +549,9 @@ class DiagnosticsEngine : public RefCountedBase { LLVM_DUMP_METHOD void dump() const; LLVM_DUMP_METHOD void dump(StringRef DiagName) const; - /// Retrieve the report SyclOptReport info. - SyclOptReportHandler &getSYCLOptReportHandler() { return OptReportHandler; } - const SyclOptReportHandler &getSYCLOptReportHandler() const { - return OptReportHandler; - } + /// Retrieve the SyclOptReport info. + SyclOptReportHandler &getSYCLOptReport() { return SyclOptReport; } + const SyclOptReportHandler &getSYCLOptReport() const { return SyclOptReport; } const IntrusiveRefCntPtr &getDiagnosticIDs() const { return Diags; diff --git a/clang/include/clang/Basic/DiagnosticDriverKinds.td b/clang/include/clang/Basic/DiagnosticDriverKinds.td index 193b5cf8d6569..3349882325e96 100644 --- a/clang/include/clang/Basic/DiagnosticDriverKinds.td +++ b/clang/include/clang/Basic/DiagnosticDriverKinds.td @@ -570,9 +570,6 @@ def err_drv_cannot_mix_options : Error<"cannot specify '%1' along with '%0'">; def err_drv_invalid_object_mode : Error<"OBJECT_MODE setting %0 is not recognized and is not a valid setting.">; -def err_aix_default_altivec_abi : Error< - "The default Altivec ABI on AIX is not yet supported, use '-mabi=vec-extabi' for the extended Altivec ABI">; - def err_aix_unsupported_tls_model : Error<"TLS model '%0' is not yet supported on AIX">; def err_invalid_cxx_abi : Error<"Invalid C++ ABI name '%0'">; diff --git a/clang/include/clang/Basic/DiagnosticFrontendKinds.td b/clang/include/clang/Basic/DiagnosticFrontendKinds.td index e68058dd19b5b..0f4ccec385506 100644 --- a/clang/include/clang/Basic/DiagnosticFrontendKinds.td +++ b/clang/include/clang/Basic/DiagnosticFrontendKinds.td @@ -25,7 +25,7 @@ def note_fe_source_mgr : Note<"%0">, CatSourceMgr; def err_fe_cannot_link_module : Error<"cannot link module '%0': %1">, DefaultFatal; -def warn_fe_frame_larger_than : Warning<"stack frame size of %0 bytes in %q1">, +def warn_fe_frame_larger_than : Warning<"stack frame size (%0) exceeds limit (%1) in %q2">, BackendInfo, InGroup; def warn_fe_backend_frame_larger_than: Warning<"%0">, BackendInfo, InGroup; diff --git a/clang/include/clang/Basic/DiagnosticGroups.td b/clang/include/clang/Basic/DiagnosticGroups.td index f44c84a11f7f2..eb738b24ccda4 100644 --- a/clang/include/clang/Basic/DiagnosticGroups.td +++ b/clang/include/clang/Basic/DiagnosticGroups.td @@ -525,6 +525,7 @@ def OpenCLUnsupportedRGBA: DiagGroup<"opencl-unsupported-rgba">; def UnderalignedExceptionObject : DiagGroup<"underaligned-exception-object">; def DeprecatedObjCIsaUsage : DiagGroup<"deprecated-objc-isa-usage">; def ExplicitInitializeCall : DiagGroup<"explicit-initialize-call">; +def OrderedCompareFunctionPointers : DiagGroup<"ordered-compare-function-pointers">; def Packed : DiagGroup<"packed">; def Padded : DiagGroup<"padded">; diff --git a/clang/include/clang/Basic/DiagnosticSemaKinds.td b/clang/include/clang/Basic/DiagnosticSemaKinds.td index beef27842121a..d8eb716abf9a6 100644 --- a/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -6819,7 +6819,7 @@ def warn_param_mismatched_alignment : Warning< def err_objc_object_assignment : Error< "cannot assign to class object (%0 invalid)">; def err_typecheck_invalid_operands : Error< - "invalid operands to binary expression (%0 and %1)">; + "invalid operands to binary expression (%0 and %1)">, Deferrable; def note_typecheck_invalid_operands_converted : Note< "%select{first|second}0 operand was implicitly converted to type %1">; def err_typecheck_logical_vector_expr_gnu_cpp_restrict : Error< @@ -6842,9 +6842,14 @@ def ext_typecheck_compare_complete_incomplete_pointers : Extension< "%0 is %select{|in}2complete and " "%1 is %select{|in}3complete">, InGroup; +def warn_typecheck_ordered_comparison_of_function_pointers : Warning< + "ordered comparison of function pointers (%0 and %1)">, + InGroup; def ext_typecheck_ordered_comparison_of_function_pointers : ExtWarn< "ordered comparison of function pointers (%0 and %1)">, - InGroup>; + InGroup; +def err_typecheck_ordered_comparison_of_function_pointers : Error< + "ordered comparison of function pointers (%0 and %1)">; def ext_typecheck_comparison_of_fptr_to_void : Extension< "equality comparison between function pointer and void pointer (%0 and %1)">; def err_typecheck_comparison_of_fptr_to_void : Error< @@ -9193,9 +9198,6 @@ def note_defaulted_comparison_cannot_deduce_undeduced_auto : Note< "%select{|member|base class}0 %1 declared here">; def note_defaulted_comparison_cannot_deduce_callee : Note< "selected 'operator<=>' for %select{|member|base class}0 %1 declared here">; -def note_defaulted_comparison_selected_invalid : Note< - "would compare %select{|member|base class}0 %1 " - "as %2, which does not support relational comparisons">; def err_incorrect_defaulted_comparison_constexpr : Error< "defaulted definition of %select{%sub{select_defaulted_comparison_kind}1|" "three-way comparison operator}0 " @@ -11424,11 +11426,6 @@ def err_sycl_type_trait_requires_nonnegative_index : Error< def err_sycl_builtin_type_trait_evaluated : Error< "'%select{__builtin_field_type|__builtin_base_type}0' cannot be used in an " "evaluated context">; -def err_sycl_non_trivially_copy_ctor_dtor_type - : Error<"kernel parameter has non-trivially %select{copy " - "constructible|destructible}0 class/struct type %1">; -def err_sycl_non_std_layout_type : Error< - "kernel parameter has non-standard layout class/struct type %0">; def err_sycl_non_constant_array_type : Error< "kernel parameter is not a constant size array %0">; def err_conflicting_sycl_kernel_attributes : Error< diff --git a/clang/include/clang/Basic/LangOptions.def b/clang/include/clang/Basic/LangOptions.def index 44099a65d990a..3e92d4e76f268 100644 --- a/clang/include/clang/Basic/LangOptions.def +++ b/clang/include/clang/Basic/LangOptions.def @@ -237,7 +237,6 @@ LANGOPT(OpenMPCUDANumSMs , 32, 0, "Number of SMs for CUDA devices.") LANGOPT(OpenMPCUDABlocksPerSM , 32, 0, "Number of blocks per SM for CUDA devices.") LANGOPT(OpenMPCUDAReductionBufNum , 32, 1024, "Number of the reduction records in the intermediate reduction buffer used for the teams reductions.") LANGOPT(OpenMPOptimisticCollapse , 1, 0, "Use at most 32 bits to represent the collapsed loop nest counter.") -LANGOPT(OpenMPCUDATargetParallel, 1, 0, "Support parallel execution of target region on Cuda-based devices.") LANGOPT(RenderScript , 1, 0, "RenderScript") LANGOPT(CUDAIsDevice , 1, 0, "compiling for CUDA device") @@ -278,6 +277,7 @@ BENIGN_LANGOPT(ModulesDebugInfo , 1, 0, "Modules debug info") BENIGN_LANGOPT(ElideConstructors , 1, 1, "C++ copy constructor elision") BENIGN_LANGOPT(DumpRecordLayouts , 1, 0, "dumping the layout of IRgen'd records") BENIGN_LANGOPT(DumpRecordLayoutsSimple , 1, 0, "dumping the layout of IRgen'd records in a simple form") +BENIGN_LANGOPT(DumpRecordLayoutsComplete , 1, 0, "dumping the AST layout of all complete records") BENIGN_LANGOPT(DumpVTableLayouts , 1, 0, "dumping the layouts of emitted vtables") LANGOPT(NoConstantCFStrings , 1, 0, "no constant CoreFoundation strings") BENIGN_LANGOPT(InlineVisibilityHidden , 1, 0, "hidden visibility for inline C++ methods") diff --git a/clang/include/clang/Basic/SyclOptReportHandler.h b/clang/include/clang/Basic/OptReportHandler.h similarity index 91% rename from clang/include/clang/Basic/SyclOptReportHandler.h rename to clang/include/clang/Basic/OptReportHandler.h index 9b15c4ccd294a..47a242f9c4efe 100644 --- a/clang/include/clang/Basic/SyclOptReportHandler.h +++ b/clang/include/clang/Basic/OptReportHandler.h @@ -1,4 +1,4 @@ -//===---------------------- SyclOptReportHandler.h --------------*- C++ -*-===// +//===------------------------ OptReportHandler.h ----------------*- C++ -*-===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -11,8 +11,8 @@ /// //===----------------------------------------------------------------------===// -#ifndef LLVM_CLANG_BASIC_SYCLOPTREPORTHANDLER_H -#define LLVM_CLANG_BASIC_SYCLOPTREPORTHANDLER_H +#ifndef LLVM_CLANG_BASIC_OPTREPORTHANDLER_H +#define LLVM_CLANG_BASIC_OPTREPORTHANDLER_H #include "clang/Basic/SourceLocation.h" #include "llvm/ADT/DenseMap.h" @@ -56,7 +56,6 @@ class SyclOptReportHandler { assert(It != Map.end()); return It->second; } - bool HasOptReportInfo(const FunctionDecl *FD) const { return Map.find(FD) != Map.end(); } @@ -64,4 +63,4 @@ class SyclOptReportHandler { } // namespace clang -#endif // LLVM_CLANG_BASIC_SYCLOPTREPORTHANDLER_H +#endif // LLVM_CLANG_BASIC_OPTREPORTHANDLER_H diff --git a/clang/include/clang/Basic/riscv_vector.td b/clang/include/clang/Basic/riscv_vector.td index 886929919d6e6..7cdec1477962b 100644 --- a/clang/include/clang/Basic/riscv_vector.td +++ b/clang/include/clang/Basic/riscv_vector.td @@ -1609,4 +1609,41 @@ let HasMask = false, HasVL = false, HasNoMaskedOverloaded = false, def vlmul_ext_u # dst_lmul : RVVBuiltin<"Uv" # dst_lmul # "Uv", dst_lmul # "UvUv", "csil">; } } + + let Name = "vget_v", + ManualCodegen = [{ + { + ID = Intrinsic::experimental_vector_extract; + ScalableVectorType *VecTy = cast(ResultType); + Ops[1] = Builder.CreateMul(Ops[1], + ConstantInt::get(Ops[1]->getType(), + VecTy->getMinNumElements())); + IntrinsicTypes = {ResultType, Ops[0]->getType()}; + return Builder.CreateCall(CGM.getIntrinsic(ID, IntrinsicTypes), Ops, ""); + } + }] in { + foreach dst_lmul = ["(SFixedLog2LMUL:0)", "(SFixedLog2LMUL:1)", "(SFixedLog2LMUL:2)"] in { + def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "vvKz", "csilfd">; + def : RVVBuiltin<"Uv" # dst_lmul # "Uv", dst_lmul # "UvUvKz", "csil">; + } + } + + let Name = "vset_v", Log2LMUL = [0, 1, 2], + ManualCodegen = [{ + { + ID = Intrinsic::experimental_vector_insert; + IntrinsicTypes = {ResultType, Ops[2]->getType()}; + ScalableVectorType *VecTy = cast(Ops[2]->getType()); + Ops[1] = Builder.CreateMul(Ops[1], + ConstantInt::get(Ops[1]->getType(), + VecTy->getMinNumElements())); + std::swap(Ops[1], Ops[2]); + return Builder.CreateCall(CGM.getIntrinsic(ID, IntrinsicTypes), Ops, ""); + } + }] in { + foreach dst_lmul = ["(LFixedLog2LMUL:1)", "(LFixedLog2LMUL:2)", "(LFixedLog2LMUL:3)"] in { + def : RVVBuiltin<"v" # dst_lmul # "v", dst_lmul # "v" # dst_lmul # "vKzv", "csilfd">; + def : RVVBuiltin<"Uv" # dst_lmul # "Uv", dst_lmul # "Uv" # dst_lmul #"UvKzUv", "csil">; + } + } } diff --git a/clang/include/clang/Driver/Options.td b/clang/include/clang/Driver/Options.td index 20674d3cb145e..6ab8bcbe8980c 100644 --- a/clang/include/clang/Driver/Options.td +++ b/clang/include/clang/Driver/Options.td @@ -2385,12 +2385,6 @@ def fopenmp_cuda_teams_reduction_recs_num_EQ : Joined<["-"], "fopenmp-cuda-teams defm openmp_optimistic_collapse : BoolFOption<"openmp-optimistic-collapse", LangOpts<"OpenMPOptimisticCollapse">, DefaultFalse, PosFlag, NegFlag, BothFlags<[NoArgumentUnused, HelpHidden]>>; -def fopenmp_cuda_parallel_target_regions : Flag<["-"], "fopenmp-cuda-parallel-target-regions">, Group, - Flags<[CC1Option, NoArgumentUnused, HelpHidden]>, - HelpText<"Support parallel execution of target regions on Cuda-based devices.">; -def fno_openmp_cuda_parallel_target_regions : Flag<["-"], "fno-openmp-cuda-parallel-target-regions">, Group, - Flags<[NoArgumentUnused, HelpHidden]>, - HelpText<"Support only serial execution of target regions on Cuda-based devices.">; def static_openmp: Flag<["-"], "static-openmp">, HelpText<"Use the static host OpenMP runtime while linking.">; def fno_optimize_sibling_calls : Flag<["-"], "fno-optimize-sibling-calls">, Group; @@ -4654,6 +4648,11 @@ def fget_symbols_sources : Flag<["-"], "fget-symbols-sources">, Group, Group, MetaVarName<"">, HelpText<"Use as the suffix for module files (the default value is `.mod`)">; +def fanalyzed_objects_for_unparse : Flag<["-"], + "fanalyzed-objects-for-unparse">, Group; +def fno_analyzed_objects_for_unparse : Flag<["-"], + "fno-analyzed-objects-for-unparse">, Group, + HelpText<"Do not use the analyzed objects when unparsing">; } @@ -5504,10 +5503,13 @@ def stats_file : Joined<["-"], "stats-file=">, def fdump_record_layouts_simple : Flag<["-"], "fdump-record-layouts-simple">, HelpText<"Dump record layout information in a simple form used for testing">, MarshallingInfoFlag>; +def fdump_record_layouts_complete : Flag<["-"], "fdump-record-layouts-complete">, + HelpText<"Dump record layout information for all complete types">, + MarshallingInfoFlag>; def fdump_record_layouts : Flag<["-"], "fdump-record-layouts">, HelpText<"Dump record layout information">, MarshallingInfoFlag>, - ImpliedByAnyOf<[fdump_record_layouts_simple.KeyPath]>; + ImpliedByAnyOf<[fdump_record_layouts_simple.KeyPath, fdump_record_layouts_complete.KeyPath]>; def fix_what_you_can : Flag<["-"], "fix-what-you-can">, HelpText<"Apply fix-it advice even in the presence of unfixable errors">, MarshallingInfoFlag>; @@ -6026,6 +6028,9 @@ def _SLASH_diagnostics_classic : CLFlag<"diagnostics:classic">, def _SLASH_D : CLJoinedOrSeparate<"D">, HelpText<"Define macro">, MetaVarName<"">, Alias; def _SLASH_E : CLFlag<"E">, HelpText<"Preprocess to stdout">, Alias; +def _SLASH_external_COLON_I : CLJoinedOrSeparate<"external:I">, Alias, + HelpText<"Add directory to include search path with warnings suppressed">, + MetaVarName<"">; def _SLASH_fp_except : CLFlag<"fp:except">, HelpText<"">, Alias; def _SLASH_fp_except_ : CLFlag<"fp:except-">, HelpText<"">, Alias; @@ -6234,6 +6239,9 @@ def _SLASH_volatile_Group : OptionGroup<"">, def _SLASH_EH : CLJoined<"EH">, HelpText<"Set exception handling model">; def _SLASH_EP : CLFlag<"EP">, HelpText<"Disable linemarker output and preprocess to stdout">; +def _SLASH_external_env : CLJoined<"external:env:">, + HelpText<"Add dirs in env var to include search path with warnings suppressed">, + MetaVarName<"">; def _SLASH_FA : CLFlag<"FA">, HelpText<"Output assembly code file during compilation">; def _SLASH_Fa : CLJoined<"Fa">, @@ -6397,7 +6405,6 @@ def _SLASH_doc : CLJoined<"doc">; def _SLASH_experimental : CLJoined<"experimental:">; def _SLASH_exportHeader : CLFlag<"exportHeader">; def _SLASH_external : CLJoined<"external:">; -def _SLASH_external_COLON_I : CLJoinedOrSeparate<"external:I">; def _SLASH_FA_joined : CLJoined<"FA">; def _SLASH_favor : CLJoined<"favor">; def _SLASH_fsanitize_address_use_after_return : CLJoined<"fsanitize-address-use-after-return">; diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index 164765ca1a1a3..c424e79a971cf 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -2117,6 +2117,26 @@ struct FormatStyle { /// For example: BOOST_FOREACH. std::vector ForEachMacros; + /// A vector of macros that should be interpreted as conditionals + /// instead of as function calls. + /// + /// These are expected to be macros of the form: + /// \code + /// IF(...) + /// + /// else IF(...) + /// + /// \endcode + /// + /// In the .clang-format configuration file, this can be configured like: + /// \code{.yaml} + /// IfMacros: ['IF'] + /// \endcode + /// + /// For example: `KJ_IF_MAYBE + /// `_ + std::vector IfMacros; + /// \brief A vector of macros that should be interpreted as type declarations /// instead of as function calls. /// @@ -2473,6 +2493,8 @@ struct FormatStyle { LK_Java, /// Should be used for JavaScript. LK_JavaScript, + /// Should be used for JSON. + LK_Json, /// Should be used for Objective-C, Objective-C++. LK_ObjC, /// Should be used for Protocol Buffers @@ -2486,10 +2508,43 @@ struct FormatStyle { }; bool isCpp() const { return Language == LK_Cpp || Language == LK_ObjC; } bool isCSharp() const { return Language == LK_CSharp; } + bool isJson() const { return Language == LK_Json; } /// Language, this format style is targeted at. LanguageKind Language; + /// Indentation logic for lambda bodies. + enum LambdaBodyIndentationKind : unsigned char { + /// Align lambda body relative to the lambda signature. This is the default. + /// \code + /// someMethod( + /// [](SomeReallyLongLambdaSignatureArgument foo) { + /// return; + /// }); + /// \endcode + LBI_Signature, + /// Align lambda body relative to the indentation level of the outer scope + /// the lambda signature resides in. + /// \code + /// someMethod( + /// [](SomeReallyLongLambdaSignatureArgument foo) { + /// return; + /// }); + /// \endcode + LBI_OuterScope, + }; + + /// The indentation style of lambda bodies. ``Signature`` (the default) + /// causes the lambda body to be indented one additional level relative to + /// the indentation level of the signature. ``OuterScope`` forces the lambda + /// body to be indented one additional level relative to the parent scope + /// containing the lambda signature. For callback-heavy code, it may improve + /// readability to have the signature indented two levels and to use + /// ``OuterScope``. The KJ style guide requires ``OuterScope``. + /// `KJ style guide + /// `_ + LambdaBodyIndentationKind LambdaBodyIndentation; + /// A regular expression matching macros that start a block. /// \code /// # With: @@ -2673,7 +2728,7 @@ struct FormatStyle { /// (counted relative to leading non-whitespace column). unsigned PenaltyIndentedWhitespace; - /// The ``&`` and ``*`` alignment style. + /// The ``&``, ``&&`` and ``*`` alignment style. enum PointerAlignmentStyle : unsigned char { /// Align pointer to the left. /// \code @@ -2768,6 +2823,31 @@ struct FormatStyle { /// \endcode std::vector RawStringFormats; + /// \brief The ``&`` and ``&&`` alignment style. + enum ReferenceAlignmentStyle { + /// Align reference like ``PointerAlignment``. + RAS_Pointer, + /// Align reference to the left. + /// \code + /// int& a; + /// \endcode + RAS_Left, + /// Align reference to the right. + /// \code + /// int &a; + /// \endcode + RAS_Right, + /// Align reference in the middle. + /// \code + /// int & a; + /// \endcode + RAS_Middle + }; + + /// \brief Reference alignment style (overrides ``PointerAlignment`` for + /// references). + ReferenceAlignmentStyle ReferenceAlignment; + // clang-format off /// If ``true``, clang-format will attempt to re-flow comments. /// \code @@ -3001,8 +3081,10 @@ struct FormatStyle { /// \endcode SBPO_ControlStatements, /// Same as ``SBPO_ControlStatements`` except this option doesn't apply to - /// ForEach macros. This is useful in projects where ForEach macros are - /// treated as function calls instead of control statements. + /// ForEach and If macros. This is useful in projects where ForEach/If + /// macros are treated as function calls instead of control statements. + /// ``SBPO_ControlStatementsExceptForEachMacros`` remains an alias for + /// backward compatability. /// \code /// void f() { /// Q_FOREACH(...) { @@ -3010,7 +3092,7 @@ struct FormatStyle { /// } /// } /// \endcode - SBPO_ControlStatementsExceptForEachMacros, + SBPO_ControlStatementsExceptControlMacros, /// Put a space before opening parentheses only if the parentheses are not /// empty i.e. '()' /// \code @@ -3377,6 +3459,7 @@ struct FormatStyle { JavaScriptWrapImports == R.JavaScriptWrapImports && KeepEmptyLinesAtTheStartOfBlocks == R.KeepEmptyLinesAtTheStartOfBlocks && + LambdaBodyIndentation == R.LambdaBodyIndentation && MacroBlockBegin == R.MacroBlockBegin && MacroBlockEnd == R.MacroBlockEnd && MaxEmptyLinesToKeep == R.MaxEmptyLinesToKeep && @@ -3400,6 +3483,7 @@ struct FormatStyle { R.PenaltyBreakTemplateDeclaration && PointerAlignment == R.PointerAlignment && RawStringFormats == R.RawStringFormats && + ReferenceAlignment == R.ReferenceAlignment && ShortNamespaceLines == R.ShortNamespaceLines && SortIncludes == R.SortIncludes && SortJavaStaticImport == R.SortJavaStaticImport && @@ -3715,6 +3799,8 @@ inline StringRef getLanguageName(FormatStyle::LanguageKind Language) { return "Java"; case FormatStyle::LK_JavaScript: return "JavaScript"; + case FormatStyle::LK_Json: + return "Json"; case FormatStyle::LK_Proto: return "Proto"; case FormatStyle::LK_TableGen: diff --git a/clang/include/clang/Sema/Overload.h b/clang/include/clang/Sema/Overload.h index 699c3e8088726..82661cb3d12ac 100644 --- a/clang/include/clang/Sema/Overload.h +++ b/clang/include/clang/Sema/Overload.h @@ -1048,9 +1048,6 @@ class Sema; void destroyCandidates(); - /// Whether diagnostics should be deferred. - bool shouldDeferDiags(Sema &S, ArrayRef Args, SourceLocation OpLoc); - public: OverloadCandidateSet(SourceLocation Loc, CandidateSetKind CSK, OperatorRewriteInfo RewriteInfo = {}) @@ -1063,6 +1060,9 @@ class Sema; CandidateSetKind getKind() const { return Kind; } OperatorRewriteInfo getRewriteInfo() const { return RewriteInfo; } + /// Whether diagnostics should be deferred. + bool shouldDeferDiags(Sema &S, ArrayRef Args, SourceLocation OpLoc); + /// Determine when this overload candidate will be new to the /// overload set. bool isNewCandidate(Decl *F, OverloadCandidateParamOrder PO = diff --git a/clang/include/clang/Sema/Sema.h b/clang/include/clang/Sema/Sema.h index 4d2c528ec7cf6..9fda2ef3b777b 100644 --- a/clang/include/clang/Sema/Sema.h +++ b/clang/include/clang/Sema/Sema.h @@ -1989,6 +1989,22 @@ class Sema final { /// Build a partial diagnostic. PartialDiagnostic PDiag(unsigned DiagID = 0); // in SemaInternal.h + /// Whether deferrable diagnostics should be deferred. + bool DeferDiags = false; + + /// RAII class to control scope of DeferDiags. + class DeferDiagsRAII { + Sema &S; + bool SavedDeferDiags = false; + + public: + DeferDiagsRAII(Sema &S, bool DeferDiags) + : S(S), SavedDeferDiags(S.DeferDiags) { + S.DeferDiags = DeferDiags; + } + ~DeferDiagsRAII() { S.DeferDiags = SavedDeferDiags; } + }; + /// Whether uncompilable error has occurred. This includes error happens /// in deferred diagnostics. bool hasUncompilableErrorOccurred() const; @@ -4356,7 +4372,8 @@ class Sema final { bool RValueThis, unsigned ThisQuals); CXXDestructorDecl *LookupDestructor(CXXRecordDecl *Class); - bool checkLiteralOperatorId(const CXXScopeSpec &SS, const UnqualifiedId &Id); + bool checkLiteralOperatorId(const CXXScopeSpec &SS, const UnqualifiedId &Id, + bool IsUDSuffix); LiteralOperatorLookupResult LookupLiteralOperator(Scope *S, LookupResult &R, ArrayRef ArgTys, bool AllowRaw, bool AllowTemplate, diff --git a/clang/include/clang/Serialization/ASTBitCodes.h b/clang/include/clang/Serialization/ASTBitCodes.h index e3a9c89a1b946..dd5a33480e54f 100644 --- a/clang/include/clang/Serialization/ASTBitCodes.h +++ b/clang/include/clang/Serialization/ASTBitCodes.h @@ -41,7 +41,7 @@ namespace serialization { /// Version 4 of AST files also requires that the version control branch and /// revision match exactly, since there is no backward compatibility of /// AST files at this time. -const unsigned VERSION_MAJOR = 13; +const unsigned VERSION_MAJOR = 14; /// AST file minor version number supported by this version of /// Clang. diff --git a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h index a4957c697c0ae..bf00fd98a4616 100644 --- a/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h +++ b/clang/include/clang/StaticAnalyzer/Core/PathSensitive/RangedConstraintManager.h @@ -384,6 +384,11 @@ class RangedConstraintManager : public SimpleConstraintManager { static void computeAdjustment(SymbolRef &Sym, llvm::APSInt &Adjustment); }; +/// Try to simplify a given symbolic expression's associated value based on the +/// constraints in State. This is needed because the Environment bindings are +/// not getting updated when a new constraint is added to the State. +SymbolRef simplify(ProgramStateRef State, SymbolRef Sym); + } // namespace ento } // namespace clang diff --git a/clang/lib/AST/ASTContext.cpp b/clang/lib/AST/ASTContext.cpp index 0b4be365441a2..27c4e82a1285d 100644 --- a/clang/lib/AST/ASTContext.cpp +++ b/clang/lib/AST/ASTContext.cpp @@ -8021,19 +8021,21 @@ static TypedefDecl *CreateVoidPtrBuiltinVaListDecl(const ASTContext *Context) { static TypedefDecl * CreateAArch64ABIBuiltinVaListDecl(const ASTContext *Context) { - // struct __va_list RecordDecl *VaListTagDecl = Context->buildImplicitRecord("__va_list"); - if (Context->getLangOpts().CPlusPlus) { - // namespace std { struct __va_list { - NamespaceDecl *NS; - NS = NamespaceDecl::Create(const_cast(*Context), - Context->getTranslationUnitDecl(), - /*Inline*/ false, SourceLocation(), - SourceLocation(), &Context->Idents.get("std"), - /*PrevDecl*/ nullptr); - NS->setImplicit(); - VaListTagDecl->setDeclContext(NS); - } + // namespace std { struct __va_list { + // Note that we create the namespace even in C. This is intentional so that + // the type is consistent between C and C++, which is important in cases where + // the types need to match between translation units (e.g. with + // -fsanitize=cfi-icall). Ideally we wouldn't have created this namespace at + // all, but it's now part of the ABI (e.g. in mangled names), so we can't + // change it. + auto *NS = NamespaceDecl::Create( + const_cast(*Context), Context->getTranslationUnitDecl(), + /*Inline*/ false, SourceLocation(), SourceLocation(), + &Context->Idents.get("std"), + /*PrevDecl*/ nullptr); + NS->setImplicit(); + VaListTagDecl->setDeclContext(NS); VaListTagDecl->startDefinition(); diff --git a/clang/lib/AST/Decl.cpp b/clang/lib/AST/Decl.cpp index 090accf48561e..bed0364094797 100644 --- a/clang/lib/AST/Decl.cpp +++ b/clang/lib/AST/Decl.cpp @@ -1081,10 +1081,9 @@ bool NamedDecl::isLinkageValid() const { ReservedIdentifierStatus NamedDecl::isReserved(const LangOptions &LangOpts) const { const IdentifierInfo *II = getIdentifier(); - if (!II) - if (const auto *FD = dyn_cast(this)) - II = FD->getLiteralIdentifier(); + // This triggers at least for CXXLiteralIdentifiers, which we already checked + // at lexing time. if (!II) return ReservedIdentifierStatus::NotReserved; @@ -4585,6 +4584,13 @@ RecordDecl::field_iterator RecordDecl::field_begin() const { void RecordDecl::completeDefinition() { assert(!isCompleteDefinition() && "Cannot redefine record!"); TagDecl::completeDefinition(); + + ASTContext &Ctx = getASTContext(); + + // Layouts are dumped when computed, so if we are dumping for all complete + // types, we need to force usage to get types that wouldn't be used elsewhere. + if (Ctx.getLangOpts().DumpRecordLayoutsComplete) + (void)Ctx.getASTRecordLayout(this); } /// isMsStruct - Get whether or not this record uses ms_struct layout. diff --git a/clang/lib/AST/ExprCXX.cpp b/clang/lib/AST/ExprCXX.cpp index b5ccfe34cce1b..26844f412f366 100644 --- a/clang/lib/AST/ExprCXX.cpp +++ b/clang/lib/AST/ExprCXX.cpp @@ -275,7 +275,8 @@ CXXNewExpr *CXXNewExpr::CreateEmpty(const ASTContext &Ctx, bool IsArray, } bool CXXNewExpr::shouldNullCheckAllocation() const { - return getOperatorNew() + return !getOperatorNew()->hasAttr() && + getOperatorNew() ->getType() ->castAs() ->isNothrow() && diff --git a/clang/lib/ASTMatchers/Dynamic/Marshallers.cpp b/clang/lib/ASTMatchers/Dynamic/Marshallers.cpp index f6fdbe868e2df..40db70e6f4a51 100644 --- a/clang/lib/ASTMatchers/Dynamic/Marshallers.cpp +++ b/clang/lib/ASTMatchers/Dynamic/Marshallers.cpp @@ -20,7 +20,7 @@ getBestGuess(llvm::StringRef Search, llvm::ArrayRef Allowed, ++MaxEditDistance; llvm::StringRef Res; for (const llvm::StringRef &Item : Allowed) { - if (Item.equals_lower(Search)) { + if (Item.equals_insensitive(Search)) { assert(!Item.equals(Search) && "This should be handled earlier on."); MaxEditDistance = 1; Res = Item; @@ -40,7 +40,7 @@ getBestGuess(llvm::StringRef Search, llvm::ArrayRef Allowed, auto NoPrefix = Item; if (!NoPrefix.consume_front(DropPrefix)) continue; - if (NoPrefix.equals_lower(Search)) { + if (NoPrefix.equals_insensitive(Search)) { if (NoPrefix.equals(Search)) return Item.str(); MaxEditDistance = 1; diff --git a/clang/lib/Analysis/CalledOnceCheck.cpp b/clang/lib/Analysis/CalledOnceCheck.cpp index db094129a9608..661f7b999f2b9 100644 --- a/clang/lib/Analysis/CalledOnceCheck.cpp +++ b/clang/lib/Analysis/CalledOnceCheck.cpp @@ -478,7 +478,7 @@ bool mentionsAnyOfConventionalNames(const Expr *E) { return llvm::any_of( CONVENTIONAL_CONDITIONS, [ConditionName](const llvm::StringLiteral &Conventional) { - return ConditionName.contains_lower(Conventional); + return ConditionName.contains_insensitive(Conventional); }); }); } diff --git a/clang/lib/Analysis/RetainSummaryManager.cpp b/clang/lib/Analysis/RetainSummaryManager.cpp index 58ead1dfe3522..7ed1e40333f43 100644 --- a/clang/lib/Analysis/RetainSummaryManager.cpp +++ b/clang/lib/Analysis/RetainSummaryManager.cpp @@ -189,20 +189,22 @@ static bool hasRCAnnotation(const Decl *D, StringRef rcAnnotation) { } static bool isRetain(const FunctionDecl *FD, StringRef FName) { - return FName.startswith_lower("retain") || FName.endswith_lower("retain"); + return FName.startswith_insensitive("retain") || + FName.endswith_insensitive("retain"); } static bool isRelease(const FunctionDecl *FD, StringRef FName) { - return FName.startswith_lower("release") || FName.endswith_lower("release"); + return FName.startswith_insensitive("release") || + FName.endswith_insensitive("release"); } static bool isAutorelease(const FunctionDecl *FD, StringRef FName) { - return FName.startswith_lower("autorelease") || - FName.endswith_lower("autorelease"); + return FName.startswith_insensitive("autorelease") || + FName.endswith_insensitive("autorelease"); } static bool isMakeCollectable(StringRef FName) { - return FName.contains_lower("MakeCollectable"); + return FName.contains_insensitive("MakeCollectable"); } /// A function is OSObject related if it is declared on a subclass @@ -1100,7 +1102,7 @@ RetainSummaryManager::getStandardMethodSummary(const ObjCMethodDecl *MD, if (S.isKeywordSelector()) { for (unsigned i = 0, e = S.getNumArgs(); i != e; ++i) { StringRef Slot = S.getNameForSlot(i); - if (Slot.substr(Slot.size() - 8).equals_lower("delegate")) { + if (Slot.substr(Slot.size() - 8).equals_insensitive("delegate")) { if (ResultEff == ObjCInitRetE) ResultEff = RetEffect::MakeNoRetHard(); else diff --git a/clang/lib/Basic/Cuda.cpp b/clang/lib/Basic/Cuda.cpp index fa7f78a25829f..766135bcb376f 100644 --- a/clang/lib/Basic/Cuda.cpp +++ b/clang/lib/Basic/Cuda.cpp @@ -111,6 +111,7 @@ static const CudaArchToStringMap arch_names[] = { GFX(1032), // gfx1032 GFX(1033), // gfx1033 GFX(1034), // gfx1034 + GFX(1035), // gfx1035 // clang-format on }; #undef SM diff --git a/clang/lib/Basic/FileManager.cpp b/clang/lib/Basic/FileManager.cpp index d39d7ba22643b..df306bd83136e 100644 --- a/clang/lib/Basic/FileManager.cpp +++ b/clang/lib/Basic/FileManager.cpp @@ -128,7 +128,7 @@ FileManager::getDirectoryRef(StringRef DirName, bool CacheFailure) { // Stat("C:") does not recognize "C:" as a valid directory std::string DirNameStr; if (DirName.size() > 1 && DirName.back() == ':' && - DirName.equals_lower(llvm::sys::path::root_name(DirName))) { + DirName.equals_insensitive(llvm::sys::path::root_name(DirName))) { DirNameStr = DirName.str() + '.'; DirName = DirNameStr; } diff --git a/clang/lib/Basic/Targets/AMDGPU.cpp b/clang/lib/Basic/Targets/AMDGPU.cpp index 8a8165e9c895c..595132e2e70ba 100644 --- a/clang/lib/Basic/Targets/AMDGPU.cpp +++ b/clang/lib/Basic/Targets/AMDGPU.cpp @@ -184,6 +184,7 @@ bool AMDGPUTargetInfo::initFeatureMap( // XXX - What does the member GPU mean if device name string passed here? if (isAMDGCN(getTriple())) { switch (llvm::AMDGPU::parseArchAMDGCN(CPU)) { + case GK_GFX1035: case GK_GFX1034: case GK_GFX1033: case GK_GFX1032: diff --git a/clang/lib/Basic/Targets/NVPTX.cpp b/clang/lib/Basic/Targets/NVPTX.cpp index 95abf46c08f7f..ae546492a8bb3 100644 --- a/clang/lib/Basic/Targets/NVPTX.cpp +++ b/clang/lib/Basic/Targets/NVPTX.cpp @@ -215,6 +215,7 @@ void NVPTXTargetInfo::getTargetDefines(const LangOptions &Opts, case CudaArch::GFX1032: case CudaArch::GFX1033: case CudaArch::GFX1034: + case CudaArch::GFX1035: case CudaArch::LAST: break; case CudaArch::UNUSED: diff --git a/clang/lib/Basic/Targets/PPC.cpp b/clang/lib/Basic/Targets/PPC.cpp index 3dfc0c0751e44..6860b5e5d02fa 100644 --- a/clang/lib/Basic/Targets/PPC.cpp +++ b/clang/lib/Basic/Targets/PPC.cpp @@ -97,6 +97,17 @@ static void defineXLCompatMacros(MacroBuilder &Builder) { Builder.defineMacro("__dcbtst", "__builtin_ppc_dcbtst"); Builder.defineMacro("__dcbz", "__builtin_ppc_dcbz"); Builder.defineMacro("__icbt", "__builtin_ppc_icbt"); + Builder.defineMacro("__compare_and_swap", "__builtin_ppc_compare_and_swap"); + Builder.defineMacro("__compare_and_swaplp", + "__builtin_ppc_compare_and_swaplp"); + Builder.defineMacro("__fetch_and_add", "__builtin_ppc_fetch_and_add"); + Builder.defineMacro("__fetch_and_addlp", "__builtin_ppc_fetch_and_addlp"); + Builder.defineMacro("__fetch_and_and", "__builtin_ppc_fetch_and_and"); + Builder.defineMacro("__fetch_and_andlp", "__builtin_ppc_fetch_and_andlp"); + Builder.defineMacro("__fetch_and_or", "__builtin_ppc_fetch_and_or"); + Builder.defineMacro("__fetch_and_orlp", "__builtin_ppc_fetch_and_orlp"); + Builder.defineMacro("__fetch_and_swap", "__builtin_ppc_fetch_and_swap"); + Builder.defineMacro("__fetch_and_swaplp", "__builtin_ppc_fetch_and_swaplp"); } /// PPCTargetInfo::getTargetDefines - Return a set of the PowerPC-specific diff --git a/clang/lib/CodeGen/CGBlocks.cpp b/clang/lib/CodeGen/CGBlocks.cpp index 9956d125d514e..769501a036e60 100644 --- a/clang/lib/CodeGen/CGBlocks.cpp +++ b/clang/lib/CodeGen/CGBlocks.cpp @@ -1948,21 +1948,13 @@ CodeGenFunction::GenerateCopyHelperFunction(const CGBlockInfo &blockInfo) { if (CGM.supportsCOMDAT()) Fn->setComdat(CGM.getModule().getOrInsertComdat(FuncName)); - IdentifierInfo *II = &C.Idents.get(FuncName); - SmallVector ArgTys; ArgTys.push_back(C.VoidPtrTy); ArgTys.push_back(C.VoidPtrTy); - QualType FunctionTy = C.getFunctionType(ReturnTy, ArgTys, {}); - FunctionDecl *FD = FunctionDecl::Create( - C, C.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II, - FunctionTy, nullptr, SC_Static, false, false); setBlockHelperAttributesVisibility(blockInfo.CapturesNonExternalType, Fn, FI, CGM); - // This is necessary to avoid inheriting the previous line number. - FD->setImplicit(); - StartFunction(FD, ReturnTy, Fn, FI, args); + StartFunction(GlobalDecl(), ReturnTy, Fn, FI, args); auto AL = ApplyDebugLocation::CreateArtificial(*this); llvm::Type *structPtrTy = blockInfo.StructureType->getPointerTo(); @@ -2143,21 +2135,12 @@ CodeGenFunction::GenerateDestroyHelperFunction(const CGBlockInfo &blockInfo) { if (CGM.supportsCOMDAT()) Fn->setComdat(CGM.getModule().getOrInsertComdat(FuncName)); - IdentifierInfo *II = &C.Idents.get(FuncName); - SmallVector ArgTys; ArgTys.push_back(C.VoidPtrTy); - QualType FunctionTy = C.getFunctionType(ReturnTy, ArgTys, {}); - - FunctionDecl *FD = FunctionDecl::Create( - C, C.getTranslationUnitDecl(), SourceLocation(), SourceLocation(), II, - FunctionTy, nullptr, SC_Static, false, false); setBlockHelperAttributesVisibility(blockInfo.CapturesNonExternalType, Fn, FI, CGM); - // This is necessary to avoid inheriting the previous line number. - FD->setImplicit(); - StartFunction(FD, ReturnTy, Fn, FI, args); + StartFunction(GlobalDecl(), ReturnTy, Fn, FI, args); markAsIgnoreThreadCheckingAtRuntime(Fn); auto AL = ApplyDebugLocation::CreateArtificial(*this); @@ -2395,21 +2378,15 @@ generateByrefCopyHelper(CodeGenFunction &CGF, const BlockByrefInfo &byrefInfo, llvm::Function::Create(LTy, llvm::GlobalValue::InternalLinkage, "__Block_byref_object_copy_", &CGF.CGM.getModule()); - IdentifierInfo *II - = &Context.Idents.get("__Block_byref_object_copy_"); - SmallVector ArgTys; ArgTys.push_back(Context.VoidPtrTy); ArgTys.push_back(Context.VoidPtrTy); - QualType FunctionTy = Context.getFunctionType(ReturnTy, ArgTys, {}); - - FunctionDecl *FD = FunctionDecl::Create( - Context, Context.getTranslationUnitDecl(), SourceLocation(), - SourceLocation(), II, FunctionTy, nullptr, SC_Static, false, false); CGF.CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI); - CGF.StartFunction(FD, ReturnTy, Fn, FI, args); + CGF.StartFunction(GlobalDecl(), ReturnTy, Fn, FI, args); + // Create a scope with an artificial location for the body of this function. + auto AL = ApplyDebugLocation::CreateArtificial(CGF); if (generator.needsCopy()) { llvm::Type *byrefPtrType = byrefInfo.Type->getPointerTo(0); @@ -2471,20 +2448,14 @@ generateByrefDisposeHelper(CodeGenFunction &CGF, "__Block_byref_object_dispose_", &CGF.CGM.getModule()); - IdentifierInfo *II - = &Context.Idents.get("__Block_byref_object_dispose_"); - SmallVector ArgTys; ArgTys.push_back(Context.VoidPtrTy); - QualType FunctionTy = Context.getFunctionType(R, ArgTys, {}); - - FunctionDecl *FD = FunctionDecl::Create( - Context, Context.getTranslationUnitDecl(), SourceLocation(), - SourceLocation(), II, FunctionTy, nullptr, SC_Static, false, false); CGF.CGM.SetInternalFunctionAttributes(GlobalDecl(), Fn, FI); - CGF.StartFunction(FD, R, Fn, FI, args); + CGF.StartFunction(GlobalDecl(), R, Fn, FI, args); + // Create a scope with an artificial location for the body of this function. + auto AL = ApplyDebugLocation::CreateArtificial(CGF); if (generator.needsDispose()) { Address addr = CGF.GetAddrOfLocalVar(&Src); diff --git a/clang/lib/CodeGen/CGBuiltin.cpp b/clang/lib/CodeGen/CGBuiltin.cpp index 04b4ee6bd3185..ef79d82c0473c 100644 --- a/clang/lib/CodeGen/CGBuiltin.cpp +++ b/clang/lib/CodeGen/CGBuiltin.cpp @@ -13382,8 +13382,8 @@ Value *CodeGenFunction::EmitX86BuiltinExpr(unsigned BuiltinID, cast(Ops[0]->getType())->getNumElements(); unsigned ShiftVal = cast(Ops[2])->getZExtValue() & 0xff; - // Mask the shift amount to width of two vectors. - ShiftVal &= (2 * NumElts) - 1; + // Mask the shift amount to width of a vector. + ShiftVal &= NumElts - 1; int Indices[16]; for (unsigned i = 0; i != NumElts; ++i) @@ -15433,6 +15433,40 @@ Value *CodeGenFunction::EmitPPCBuiltinExpr(unsigned BuiltinID, Value *Call = Builder.CreateCall(F, CallOps); return Builder.CreateAlignedStore(Call, Ops[0], MaybeAlign(64)); } + + case PPC::BI__builtin_ppc_compare_and_swap: + case PPC::BI__builtin_ppc_compare_and_swaplp: { + Address Addr = EmitPointerWithAlignment(E->getArg(0)); + Address OldValAddr = EmitPointerWithAlignment(E->getArg(1)); + Value *OldVal = Builder.CreateLoad(OldValAddr); + QualType AtomicTy = E->getArg(0)->getType()->getPointeeType(); + LValue LV = MakeAddrLValue(Addr, AtomicTy); + auto Pair = EmitAtomicCompareExchange( + LV, RValue::get(OldVal), RValue::get(Ops[2]), E->getExprLoc(), + llvm::AtomicOrdering::Monotonic, llvm::AtomicOrdering::Monotonic, true); + return Pair.second; + } + case PPC::BI__builtin_ppc_fetch_and_add: + case PPC::BI__builtin_ppc_fetch_and_addlp: { + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Add, E, + llvm::AtomicOrdering::Monotonic); + } + case PPC::BI__builtin_ppc_fetch_and_and: + case PPC::BI__builtin_ppc_fetch_and_andlp: { + return MakeBinaryAtomicValue(*this, AtomicRMWInst::And, E, + llvm::AtomicOrdering::Monotonic); + } + + case PPC::BI__builtin_ppc_fetch_and_or: + case PPC::BI__builtin_ppc_fetch_and_orlp: { + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Or, E, + llvm::AtomicOrdering::Monotonic); + } + case PPC::BI__builtin_ppc_fetch_and_swap: + case PPC::BI__builtin_ppc_fetch_and_swaplp: { + return MakeBinaryAtomicValue(*this, AtomicRMWInst::Xchg, E, + llvm::AtomicOrdering::Monotonic); + } } } diff --git a/clang/lib/CodeGen/CGDebugInfo.cpp b/clang/lib/CodeGen/CGDebugInfo.cpp index 742b2e90b6a4f..c10a1e585f6bc 100644 --- a/clang/lib/CodeGen/CGDebugInfo.cpp +++ b/clang/lib/CodeGen/CGDebugInfo.cpp @@ -578,6 +578,9 @@ void CGDebugInfo::CreateCompileUnit() { LangTag = llvm::dwarf::DW_LANG_C_plus_plus; } else if (LO.ObjC) { LangTag = llvm::dwarf::DW_LANG_ObjC; + } else if (LO.OpenCL && (!CGM.getCodeGenOpts().DebugStrictDwarf || + CGM.getCodeGenOpts().DwarfVersion >= 5)) { + LangTag = llvm::dwarf::DW_LANG_OpenCL; } else if (LO.RenderScript) { LangTag = llvm::dwarf::DW_LANG_GOOGLE_RenderScript; } else if (LO.C99) { diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp index e0f9f5449b64b..2e79e2978d3da 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp @@ -1102,17 +1102,6 @@ void CGOpenMPRuntimeGPU::emitNonSPMDKernel(const OMPExecutableDirective &D, } Action(EST, WST); CodeGen.setAction(Action); IsInTTDRegion = true; - // Reserve place for the globalized memory. - GlobalizedRecords.emplace_back(); - if (!KernelStaticGlobalized) { - KernelStaticGlobalized = new llvm::GlobalVariable( - CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false, - llvm::GlobalValue::InternalLinkage, - llvm::UndefValue::get(CGM.VoidPtrTy), - "_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr, - llvm::GlobalValue::NotThreadLocal, - CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared)); - } emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); IsInTTDRegion = false; @@ -1164,10 +1153,6 @@ void CGOpenMPRuntimeGPU::emitNonSPMDEntryHeader(CodeGenFunction &CGF, CGM.getModule(), OMPRTL___kmpc_kernel_init), Args); - // For data sharing, we need to initialize the stack. - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_data_sharing_init_stack)); - emitGenericVarsProlog(CGF, WST.Loc); } @@ -1236,17 +1221,6 @@ void CGOpenMPRuntimeGPU::emitSPMDKernel(const OMPExecutableDirective &D, } Action(*this, EST, D); CodeGen.setAction(Action); IsInTTDRegion = true; - // Reserve place for the globalized memory. - GlobalizedRecords.emplace_back(); - if (!KernelStaticGlobalized) { - KernelStaticGlobalized = new llvm::GlobalVariable( - CGM.getModule(), CGM.VoidPtrTy, /*isConstant=*/false, - llvm::GlobalValue::InternalLinkage, - llvm::UndefValue::get(CGM.VoidPtrTy), - "_openmp_kernel_static_glob_rd$ptr", /*InsertBefore=*/nullptr, - llvm::GlobalValue::NotThreadLocal, - CGM.getContext().getTargetAddressSpace(LangAS::cuda_shared)); - } emitTargetOutlinedFunctionHelper(D, ParentName, OutlinedFn, OutlinedFnID, IsOffloadEntry, CodeGen); IsInTTDRegion = false; @@ -1268,12 +1242,6 @@ void CGOpenMPRuntimeGPU::emitSPMDEntryHeader( CGM.getModule(), OMPRTL___kmpc_spmd_kernel_init), Args); - if (RequiresFullRuntime) { - // For data sharing, we need to initialize the stack. - CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_data_sharing_init_stack_spmd)); - } - CGF.EmitBranch(ExecuteBB); CGF.EmitBlock(ExecuteBB); @@ -1679,16 +1647,13 @@ llvm::Function *CGOpenMPRuntimeGPU::emitTeamsOutlinedFunction( static_cast(CGF.CGM.getOpenMPRuntime()); if (GlobalizedRD) { auto I = Rt.FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first; - I->getSecond().GlobalRecord = GlobalizedRD; I->getSecond().MappedParams = std::make_unique(); DeclToAddrMapTy &Data = I->getSecond().LocalVarData; for (const auto &Pair : MappedDeclsFields) { assert(Pair.getFirst()->isCanonicalDecl() && "Expected canonical declaration"); - Data.insert(std::make_pair(Pair.getFirst(), - MappedVarData(Pair.getSecond(), - /*IsOnePerTeam=*/true))); + Data.insert(std::make_pair(Pair.getFirst(), MappedVarData())); } } Rt.emitGenericVarsProlog(CGF, Loc); @@ -1717,282 +1682,69 @@ void CGOpenMPRuntimeGPU::emitGenericVarsProlog(CodeGenFunction &CGF, const auto I = FunctionGlobalizedDecls.find(CGF.CurFn); if (I == FunctionGlobalizedDecls.end()) return; - if (const RecordDecl *GlobalizedVarsRecord = I->getSecond().GlobalRecord) { - QualType GlobalRecTy = CGM.getContext().getRecordType(GlobalizedVarsRecord); - QualType SecGlobalRecTy; - // Recover pointer to this function's global record. The runtime will - // handle the specifics of the allocation of the memory. - // Use actual memory size of the record including the padding - // for alignment purposes. - unsigned Alignment = - CGM.getContext().getTypeAlignInChars(GlobalRecTy).getQuantity(); - unsigned GlobalRecordSize = - CGM.getContext().getTypeSizeInChars(GlobalRecTy).getQuantity(); - GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment); - - llvm::PointerType *GlobalRecPtrTy = - CGF.ConvertTypeForMem(GlobalRecTy)->getPointerTo(); - llvm::Value *GlobalRecCastAddr; - llvm::Value *IsTTD = nullptr; - if (!IsInTTDRegion && - (WithSPMDCheck || - getExecutionMode() == CGOpenMPRuntimeGPU::EM_Unknown)) { - llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit"); - llvm::BasicBlock *SPMDBB = CGF.createBasicBlock(".spmd"); - llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd"); - if (I->getSecond().SecondaryGlobalRecord.hasValue()) { - llvm::Value *RTLoc = emitUpdateLocation(CGF, Loc); - llvm::Value *ThreadID = getThreadID(CGF, Loc); - llvm::Value *PL = CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction(CGM.getModule(), - OMPRTL___kmpc_parallel_level), - {RTLoc, ThreadID}); - IsTTD = Bld.CreateIsNull(PL); - } - llvm::Value *IsSPMD = Bld.CreateIsNotNull( - CGF.EmitNounwindRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_is_spmd_exec_mode))); - Bld.CreateCondBr(IsSPMD, SPMDBB, NonSPMDBB); - // There is no need to emit line number for unconditional branch. - (void)ApplyDebugLocation::CreateEmpty(CGF); - CGF.EmitBlock(SPMDBB); - Address RecPtr = Address(llvm::ConstantPointerNull::get(GlobalRecPtrTy), - CharUnits::fromQuantity(Alignment)); - CGF.EmitBranch(ExitBB); - // There is no need to emit line number for unconditional branch. - (void)ApplyDebugLocation::CreateEmpty(CGF); - CGF.EmitBlock(NonSPMDBB); - llvm::Value *Size = llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize); - if (const RecordDecl *SecGlobalizedVarsRecord = - I->getSecond().SecondaryGlobalRecord.getValueOr(nullptr)) { - SecGlobalRecTy = - CGM.getContext().getRecordType(SecGlobalizedVarsRecord); - - // Recover pointer to this function's global record. The runtime will - // handle the specifics of the allocation of the memory. - // Use actual memory size of the record including the padding - // for alignment purposes. - unsigned Alignment = - CGM.getContext().getTypeAlignInChars(SecGlobalRecTy).getQuantity(); - unsigned GlobalRecordSize = - CGM.getContext().getTypeSizeInChars(SecGlobalRecTy).getQuantity(); - GlobalRecordSize = llvm::alignTo(GlobalRecordSize, Alignment); - Size = Bld.CreateSelect( - IsTTD, llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), Size); - } - // TODO: allow the usage of shared memory to be controlled by - // the user, for now, default to global. - llvm::Value *GlobalRecordSizeArg[] = { - Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; - llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_data_sharing_coalesced_push_stack), - GlobalRecordSizeArg); - GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( - GlobalRecValue, GlobalRecPtrTy); - CGF.EmitBlock(ExitBB); - auto *Phi = Bld.CreatePHI(GlobalRecPtrTy, - /*NumReservedValues=*/2, "_select_stack"); - Phi->addIncoming(RecPtr.getPointer(), SPMDBB); - Phi->addIncoming(GlobalRecCastAddr, NonSPMDBB); - GlobalRecCastAddr = Phi; - I->getSecond().GlobalRecordAddr = Phi; - I->getSecond().IsInSPMDModeFlag = IsSPMD; - } else if (!CGM.getLangOpts().OpenMPCUDATargetParallel && IsInTTDRegion) { - assert(GlobalizedRecords.back().Records.size() < 2 && - "Expected less than 2 globalized records: one for target and one " - "for teams."); - unsigned Offset = 0; - for (const RecordDecl *RD : GlobalizedRecords.back().Records) { - QualType RDTy = CGM.getContext().getRecordType(RD); - unsigned Alignment = - CGM.getContext().getTypeAlignInChars(RDTy).getQuantity(); - unsigned Size = CGM.getContext().getTypeSizeInChars(RDTy).getQuantity(); - Offset = - llvm::alignTo(llvm::alignTo(Offset, Alignment) + Size, Alignment); - } - unsigned Alignment = - CGM.getContext().getTypeAlignInChars(GlobalRecTy).getQuantity(); - Offset = llvm::alignTo(Offset, Alignment); - GlobalizedRecords.back().Records.push_back(GlobalizedVarsRecord); - ++GlobalizedRecords.back().RegionCounter; - if (GlobalizedRecords.back().Records.size() == 1) { - assert(KernelStaticGlobalized && - "Kernel static pointer must be initialized already."); - auto *UseSharedMemory = new llvm::GlobalVariable( - CGM.getModule(), CGM.Int16Ty, /*isConstant=*/true, - llvm::GlobalValue::InternalLinkage, nullptr, - "_openmp_static_kernel$is_shared"); - UseSharedMemory->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - QualType Int16Ty = CGM.getContext().getIntTypeForBitwidth( - /*DestWidth=*/16, /*Signed=*/0); - llvm::Value *IsInSharedMemory = CGF.EmitLoadOfScalar( - Address(UseSharedMemory, - CGM.getContext().getTypeAlignInChars(Int16Ty)), - /*Volatile=*/false, Int16Ty, Loc); - auto *StaticGlobalized = new llvm::GlobalVariable( - CGM.getModule(), CGM.Int8Ty, /*isConstant=*/false, - llvm::GlobalValue::CommonLinkage, nullptr); - auto *RecSize = new llvm::GlobalVariable( - CGM.getModule(), CGM.SizeTy, /*isConstant=*/true, - llvm::GlobalValue::InternalLinkage, nullptr, - "_openmp_static_kernel$size"); - RecSize->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global); - llvm::Value *Ld = CGF.EmitLoadOfScalar( - Address(RecSize, CGM.getSizeAlign()), /*Volatile=*/false, - CGM.getContext().getSizeType(), Loc); - llvm::Value *ResAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( - KernelStaticGlobalized, CGM.VoidPtrPtrTy); - llvm::Value *GlobalRecordSizeArg[] = { - llvm::ConstantInt::get( - CGM.Int16Ty, - getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD ? 1 : 0), - StaticGlobalized, Ld, IsInSharedMemory, ResAddr}; - CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_get_team_static_memory), - GlobalRecordSizeArg); - GlobalizedRecords.back().Buffer = StaticGlobalized; - GlobalizedRecords.back().RecSize = RecSize; - GlobalizedRecords.back().UseSharedMemory = UseSharedMemory; - GlobalizedRecords.back().Loc = Loc; - } - assert(KernelStaticGlobalized && "Global address must be set already."); - Address FrameAddr = CGF.EmitLoadOfPointer( - Address(KernelStaticGlobalized, CGM.getPointerAlign()), - CGM.getContext() - .getPointerType(CGM.getContext().VoidPtrTy) - .castAs()); - llvm::Value *GlobalRecValue = - Bld.CreateConstInBoundsGEP(FrameAddr, Offset).getPointer(); - I->getSecond().GlobalRecordAddr = GlobalRecValue; - I->getSecond().IsInSPMDModeFlag = nullptr; - GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( - GlobalRecValue, CGF.ConvertTypeForMem(GlobalRecTy)->getPointerTo()); - } else { - // TODO: allow the usage of shared memory to be controlled by - // the user, for now, default to global. - bool UseSharedMemory = - IsInTTDRegion && GlobalRecordSize <= SharedMemorySize; - llvm::Value *GlobalRecordSizeArg[] = { - llvm::ConstantInt::get(CGM.SizeTy, GlobalRecordSize), - CGF.Builder.getInt16(UseSharedMemory ? 1 : 0)}; - llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), - IsInTTDRegion ? OMPRTL___kmpc_data_sharing_push_stack - : OMPRTL___kmpc_data_sharing_coalesced_push_stack), - GlobalRecordSizeArg); - GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( - GlobalRecValue, GlobalRecPtrTy); - I->getSecond().GlobalRecordAddr = GlobalRecValue; - I->getSecond().IsInSPMDModeFlag = nullptr; + for (auto &Rec : I->getSecond().LocalVarData) { + const auto *VD = cast(Rec.first); + bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first); + QualType VarTy = VD->getType(); + + // Get the local allocation of a firstprivate variable before sharing + llvm::Value *ParValue; + if (EscapedParam) { + LValue ParLVal = + CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); + ParValue = CGF.EmitLoadOfScalar(ParLVal, Loc); } - LValue Base = - CGF.MakeNaturalAlignPointeeAddrLValue(GlobalRecCastAddr, GlobalRecTy); - - // Emit the "global alloca" which is a GEP from the global declaration - // record using the pointer returned by the runtime. - LValue SecBase; - decltype(I->getSecond().LocalVarData)::const_iterator SecIt; - if (IsTTD) { - SecIt = I->getSecond().SecondaryLocalVarData->begin(); - llvm::PointerType *SecGlobalRecPtrTy = - CGF.ConvertTypeForMem(SecGlobalRecTy)->getPointerTo(); - SecBase = CGF.MakeNaturalAlignPointeeAddrLValue( - Bld.CreatePointerBitCastOrAddrSpaceCast( - I->getSecond().GlobalRecordAddr, SecGlobalRecPtrTy), - SecGlobalRecTy); - } - for (auto &Rec : I->getSecond().LocalVarData) { - bool EscapedParam = I->getSecond().EscapedParameters.count(Rec.first); - llvm::Value *ParValue; - if (EscapedParam) { - const auto *VD = cast(Rec.first); - LValue ParLVal = - CGF.MakeAddrLValue(CGF.GetAddrOfLocalVar(VD), VD->getType()); - ParValue = CGF.EmitLoadOfScalar(ParLVal, Loc); - } - LValue VarAddr = CGF.EmitLValueForField(Base, Rec.second.FD); - // Emit VarAddr basing on lane-id if required. - QualType VarTy; - if (Rec.second.IsOnePerTeam) { - VarTy = Rec.second.FD->getType(); - } else { - Address Addr = VarAddr.getAddress(CGF); - llvm::Value *Ptr = CGF.Builder.CreateInBoundsGEP( - Addr.getElementType(), Addr.getPointer(), - {Bld.getInt32(0), getNVPTXLaneID(CGF)}); - VarTy = - Rec.second.FD->getType()->castAsArrayTypeUnsafe()->getElementType(); - VarAddr = CGF.MakeAddrLValue( - Address(Ptr, CGM.getContext().getDeclAlign(Rec.first)), VarTy, - AlignmentSource::Decl); - } - Rec.second.PrivateAddr = VarAddr.getAddress(CGF); - if (!IsInTTDRegion && - (WithSPMDCheck || - getExecutionMode() == CGOpenMPRuntimeGPU::EM_Unknown)) { - assert(I->getSecond().IsInSPMDModeFlag && - "Expected unknown execution mode or required SPMD check."); - if (IsTTD) { - assert(SecIt->second.IsOnePerTeam && - "Secondary glob data must be one per team."); - LValue SecVarAddr = CGF.EmitLValueForField(SecBase, SecIt->second.FD); - VarAddr.setAddress( - Address(Bld.CreateSelect(IsTTD, SecVarAddr.getPointer(CGF), - VarAddr.getPointer(CGF)), - VarAddr.getAlignment())); - Rec.second.PrivateAddr = VarAddr.getAddress(CGF); - } - Address GlobalPtr = Rec.second.PrivateAddr; - Address LocalAddr = CGF.CreateMemTemp(VarTy, Rec.second.FD->getName()); - Rec.second.PrivateAddr = Address( - Bld.CreateSelect(I->getSecond().IsInSPMDModeFlag, - LocalAddr.getPointer(), GlobalPtr.getPointer()), - LocalAddr.getAlignment()); - } - if (EscapedParam) { - const auto *VD = cast(Rec.first); - CGF.EmitStoreOfScalar(ParValue, VarAddr); - I->getSecond().MappedParams->setVarAddr(CGF, VD, - VarAddr.getAddress(CGF)); - } - if (IsTTD) - ++SecIt; + + // Allocate space for the variable to be globalized + llvm::Value *AllocArgs[] = {CGF.getTypeSize(VD->getType())}; + llvm::Instruction *VoidPtr = + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_alloc_shared), + AllocArgs, VD->getName()); + + // Cast the void pointer and get the address of the globalized variable. + llvm::PointerType *VarPtrTy = CGF.ConvertTypeForMem(VarTy)->getPointerTo(); + llvm::Value *CastedVoidPtr = Bld.CreatePointerBitCastOrAddrSpaceCast( + VoidPtr, VarPtrTy, VD->getName() + "_on_stack"); + LValue VarAddr = CGF.MakeNaturalAlignAddrLValue(CastedVoidPtr, VarTy); + Rec.second.PrivateAddr = VarAddr.getAddress(CGF); + Rec.second.GlobalizedVal = VoidPtr; + + // Assign the local allocation to the newly globalized location. + if (EscapedParam) { + CGF.EmitStoreOfScalar(ParValue, VarAddr); + I->getSecond().MappedParams->setVarAddr(CGF, VD, VarAddr.getAddress(CGF)); } + if (auto *DI = CGF.getDebugInfo()) + VoidPtr->setDebugLoc(DI->SourceLocToDebugLoc(VD->getLocation())); } - for (const ValueDecl *VD : I->getSecond().EscapedVariableLengthDecls) { - // Recover pointer to this function's global record. The runtime will - // handle the specifics of the allocation of the memory. - // Use actual memory size of the record including the padding + for (const auto *VD : I->getSecond().EscapedVariableLengthDecls) { + // Use actual memory size of the VLA object including the padding // for alignment purposes. - CGBuilderTy &Bld = CGF.Builder; llvm::Value *Size = CGF.getTypeSize(VD->getType()); CharUnits Align = CGM.getContext().getDeclAlign(VD); Size = Bld.CreateNUWAdd( Size, llvm::ConstantInt::get(CGF.SizeTy, Align.getQuantity() - 1)); llvm::Value *AlignVal = llvm::ConstantInt::get(CGF.SizeTy, Align.getQuantity()); + Size = Bld.CreateUDiv(Size, AlignVal); Size = Bld.CreateNUWMul(Size, AlignVal); - // TODO: allow the usage of shared memory to be controlled by - // the user, for now, default to global. - llvm::Value *GlobalRecordSizeArg[] = { - Size, CGF.Builder.getInt16(/*UseSharedMemory=*/0)}; - llvm::Value *GlobalRecValue = CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_data_sharing_coalesced_push_stack), - GlobalRecordSizeArg); - llvm::Value *GlobalRecCastAddr = Bld.CreatePointerBitCastOrAddrSpaceCast( - GlobalRecValue, CGF.ConvertTypeForMem(VD->getType())->getPointerTo()); - LValue Base = CGF.MakeAddrLValue(GlobalRecCastAddr, VD->getType(), + + // Allocate space for this VLA object to be globalized. + llvm::Value *AllocArgs[] = {CGF.getTypeSize(VD->getType())}; + llvm::Instruction *VoidPtr = + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_alloc_shared), + AllocArgs, VD->getName()); + + I->getSecond().EscapedVariableLengthDeclsAddrs.emplace_back(VoidPtr); + LValue Base = CGF.MakeAddrLValue(VoidPtr, VD->getType(), CGM.getContext().getDeclAlign(VD), AlignmentSource::Decl); I->getSecond().MappedParams->setVarAddr(CGF, cast(VD), Base.getAddress(CGF)); - I->getSecond().EscapedVariableLengthDeclsAddrs.emplace_back(GlobalRecValue); } I->getSecond().MappedParams->apply(CGF); } @@ -2005,60 +1757,20 @@ void CGOpenMPRuntimeGPU::emitGenericVarsEpilog(CodeGenFunction &CGF, const auto I = FunctionGlobalizedDecls.find(CGF.CurFn); if (I != FunctionGlobalizedDecls.end()) { - I->getSecond().MappedParams->restore(CGF); - if (!CGF.HaveInsertPoint()) - return; + // Deallocate the memory for each globalized VLA object for (llvm::Value *Addr : llvm::reverse(I->getSecond().EscapedVariableLengthDeclsAddrs)) { - CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_data_sharing_pop_stack), - Addr); + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_free_shared), + Addr); } - if (I->getSecond().GlobalRecordAddr) { - if (!IsInTTDRegion && - (WithSPMDCheck || - getExecutionMode() == CGOpenMPRuntimeGPU::EM_Unknown)) { - CGBuilderTy &Bld = CGF.Builder; - llvm::BasicBlock *ExitBB = CGF.createBasicBlock(".exit"); - llvm::BasicBlock *NonSPMDBB = CGF.createBasicBlock(".non-spmd"); - Bld.CreateCondBr(I->getSecond().IsInSPMDModeFlag, ExitBB, NonSPMDBB); - // There is no need to emit line number for unconditional branch. - (void)ApplyDebugLocation::CreateEmpty(CGF); - CGF.EmitBlock(NonSPMDBB); - CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_data_sharing_pop_stack), - CGF.EmitCastToVoidPtr(I->getSecond().GlobalRecordAddr)); - CGF.EmitBlock(ExitBB); - } else if (!CGM.getLangOpts().OpenMPCUDATargetParallel && IsInTTDRegion) { - assert(GlobalizedRecords.back().RegionCounter > 0 && - "region counter must be > 0."); - --GlobalizedRecords.back().RegionCounter; - // Emit the restore function only in the target region. - if (GlobalizedRecords.back().RegionCounter == 0) { - QualType Int16Ty = CGM.getContext().getIntTypeForBitwidth( - /*DestWidth=*/16, /*Signed=*/0); - llvm::Value *IsInSharedMemory = CGF.EmitLoadOfScalar( - Address(GlobalizedRecords.back().UseSharedMemory, - CGM.getContext().getTypeAlignInChars(Int16Ty)), - /*Volatile=*/false, Int16Ty, GlobalizedRecords.back().Loc); - llvm::Value *Args[] = { - llvm::ConstantInt::get( - CGM.Int16Ty, - getExecutionMode() == CGOpenMPRuntimeGPU::EM_SPMD ? 1 : 0), - IsInSharedMemory}; - CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_restore_team_static_memory), - Args); - } - } else { - CGF.EmitRuntimeCall( - OMPBuilder.getOrCreateRuntimeFunction( - CGM.getModule(), OMPRTL___kmpc_data_sharing_pop_stack), - I->getSecond().GlobalRecordAddr); - } + // Deallocate the memory for each globalized value + for (auto &Rec : llvm::reverse(I->getSecond().LocalVarData)) { + I->getSecond().MappedParams->restore(CGF); + + CGF.EmitRuntimeCall(OMPBuilder.getOrCreateRuntimeFunction( + CGM.getModule(), OMPRTL___kmpc_free_shared), + {Rec.second.GlobalizedVal}); } } } @@ -4183,7 +3895,6 @@ void CGOpenMPRuntimeGPU::emitFunctionProlog(CodeGenFunction &CGF, auto I = FunctionGlobalizedDecls.try_emplace(CGF.CurFn).first; I->getSecond().MappedParams = std::make_unique(); - I->getSecond().GlobalRecord = GlobalizedVarsRecord; I->getSecond().EscapedParameters.insert( VarChecker.getEscapedParameters().begin(), VarChecker.getEscapedParameters().end()); @@ -4192,21 +3903,16 @@ void CGOpenMPRuntimeGPU::emitFunctionProlog(CodeGenFunction &CGF, DeclToAddrMapTy &Data = I->getSecond().LocalVarData; for (const ValueDecl *VD : VarChecker.getEscapedDecls()) { assert(VD->isCanonicalDecl() && "Expected canonical declaration"); - const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD); - Data.insert(std::make_pair(VD, MappedVarData(FD, IsInTTDRegion))); + Data.insert(std::make_pair(VD, MappedVarData())); } if (!IsInTTDRegion && !NeedToDelayGlobalization && !IsInParallelRegion) { CheckVarsEscapingDeclContext VarChecker(CGF, llvm::None); VarChecker.Visit(Body); - I->getSecond().SecondaryGlobalRecord = - VarChecker.getGlobalizedRecord(/*IsInTTDRegion=*/true); I->getSecond().SecondaryLocalVarData.emplace(); DeclToAddrMapTy &Data = I->getSecond().SecondaryLocalVarData.getValue(); for (const ValueDecl *VD : VarChecker.getEscapedDecls()) { assert(VD->isCanonicalDecl() && "Expected canonical declaration"); - const FieldDecl *FD = VarChecker.getFieldForGlobalizedVar(VD); - Data.insert( - std::make_pair(VD, MappedVarData(FD, /*IsInTTDRegion=*/true))); + Data.insert(std::make_pair(VD, MappedVarData())); } } if (!NeedToDelayGlobalization) { @@ -4488,6 +4194,7 @@ void CGOpenMPRuntimeGPU::processRequiresDirective( case CudaArch::GFX1032: case CudaArch::GFX1033: case CudaArch::GFX1034: + case CudaArch::GFX1035: case CudaArch::UNUSED: case CudaArch::UNKNOWN: break; @@ -4499,187 +4206,8 @@ void CGOpenMPRuntimeGPU::processRequiresDirective( CGOpenMPRuntime::processRequiresDirective(D); } -/// Get number of SMs and number of blocks per SM. -static std::pair getSMsBlocksPerSM(CodeGenModule &CGM) { - std::pair Data; - if (CGM.getLangOpts().OpenMPCUDANumSMs) - Data.first = CGM.getLangOpts().OpenMPCUDANumSMs; - if (CGM.getLangOpts().OpenMPCUDABlocksPerSM) - Data.second = CGM.getLangOpts().OpenMPCUDABlocksPerSM; - if (Data.first && Data.second) - return Data; - switch (getCudaArch(CGM)) { - case CudaArch::SM_20: - case CudaArch::SM_21: - case CudaArch::SM_30: - case CudaArch::SM_32: - case CudaArch::SM_35: - case CudaArch::SM_37: - case CudaArch::SM_50: - case CudaArch::SM_52: - case CudaArch::SM_53: - return {16, 16}; - case CudaArch::SM_60: - case CudaArch::SM_61: - case CudaArch::SM_62: - return {56, 32}; - case CudaArch::SM_70: - case CudaArch::SM_72: - case CudaArch::SM_75: - case CudaArch::SM_80: - case CudaArch::SM_86: - return {84, 32}; - case CudaArch::GFX600: - case CudaArch::GFX601: - case CudaArch::GFX602: - case CudaArch::GFX700: - case CudaArch::GFX701: - case CudaArch::GFX702: - case CudaArch::GFX703: - case CudaArch::GFX704: - case CudaArch::GFX705: - case CudaArch::GFX801: - case CudaArch::GFX802: - case CudaArch::GFX803: - case CudaArch::GFX805: - case CudaArch::GFX810: - case CudaArch::GFX900: - case CudaArch::GFX902: - case CudaArch::GFX904: - case CudaArch::GFX906: - case CudaArch::GFX908: - case CudaArch::GFX909: - case CudaArch::GFX90a: - case CudaArch::GFX90c: - case CudaArch::GFX1010: - case CudaArch::GFX1011: - case CudaArch::GFX1012: - case CudaArch::GFX1013: - case CudaArch::GFX1030: - case CudaArch::GFX1031: - case CudaArch::GFX1032: - case CudaArch::GFX1033: - case CudaArch::GFX1034: - case CudaArch::UNUSED: - case CudaArch::UNKNOWN: - break; - case CudaArch::LAST: - llvm_unreachable("Unexpected Cuda arch."); - } - llvm_unreachable("Unexpected NVPTX target without ptx feature."); -} - void CGOpenMPRuntimeGPU::clear() { - if (!GlobalizedRecords.empty() && - !CGM.getLangOpts().OpenMPCUDATargetParallel) { - ASTContext &C = CGM.getContext(); - llvm::SmallVector GlobalRecs; - llvm::SmallVector SharedRecs; - RecordDecl *StaticRD = C.buildImplicitRecord( - "_openmp_static_memory_type_$_", RecordDecl::TagKind::TTK_Union); - StaticRD->startDefinition(); - RecordDecl *SharedStaticRD = C.buildImplicitRecord( - "_shared_openmp_static_memory_type_$_", RecordDecl::TagKind::TTK_Union); - SharedStaticRD->startDefinition(); - for (const GlobalPtrSizeRecsTy &Records : GlobalizedRecords) { - if (Records.Records.empty()) - continue; - unsigned Size = 0; - unsigned RecAlignment = 0; - for (const RecordDecl *RD : Records.Records) { - QualType RDTy = C.getRecordType(RD); - unsigned Alignment = C.getTypeAlignInChars(RDTy).getQuantity(); - RecAlignment = std::max(RecAlignment, Alignment); - unsigned RecSize = C.getTypeSizeInChars(RDTy).getQuantity(); - Size = - llvm::alignTo(llvm::alignTo(Size, Alignment) + RecSize, Alignment); - } - Size = llvm::alignTo(Size, RecAlignment); - llvm::APInt ArySize(/*numBits=*/64, Size); - QualType SubTy = C.getConstantArrayType( - C.CharTy, ArySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); - const bool UseSharedMemory = Size <= SharedMemorySize; - auto *Field = - FieldDecl::Create(C, UseSharedMemory ? SharedStaticRD : StaticRD, - SourceLocation(), SourceLocation(), nullptr, SubTy, - C.getTrivialTypeSourceInfo(SubTy, SourceLocation()), - /*BW=*/nullptr, /*Mutable=*/false, - /*InitStyle=*/ICIS_NoInit); - Field->setAccess(AS_public); - if (UseSharedMemory) { - SharedStaticRD->addDecl(Field); - SharedRecs.push_back(&Records); - } else { - StaticRD->addDecl(Field); - GlobalRecs.push_back(&Records); - } - Records.RecSize->setInitializer(llvm::ConstantInt::get(CGM.SizeTy, Size)); - Records.UseSharedMemory->setInitializer( - llvm::ConstantInt::get(CGM.Int16Ty, UseSharedMemory ? 1 : 0)); - } - // Allocate SharedMemorySize buffer for the shared memory. - // FIXME: nvlink does not handle weak linkage correctly (object with the - // different size are reported as erroneous). - // Restore this code as sson as nvlink is fixed. - if (!SharedStaticRD->field_empty()) { - llvm::APInt ArySize(/*numBits=*/64, SharedMemorySize); - QualType SubTy = C.getConstantArrayType( - C.CharTy, ArySize, nullptr, ArrayType::Normal, /*IndexTypeQuals=*/0); - auto *Field = FieldDecl::Create( - C, SharedStaticRD, SourceLocation(), SourceLocation(), nullptr, SubTy, - C.getTrivialTypeSourceInfo(SubTy, SourceLocation()), - /*BW=*/nullptr, /*Mutable=*/false, - /*InitStyle=*/ICIS_NoInit); - Field->setAccess(AS_public); - SharedStaticRD->addDecl(Field); - } - SharedStaticRD->completeDefinition(); - if (!SharedStaticRD->field_empty()) { - QualType StaticTy = C.getRecordType(SharedStaticRD); - llvm::Type *LLVMStaticTy = CGM.getTypes().ConvertTypeForMem(StaticTy); - auto *GV = new llvm::GlobalVariable( - CGM.getModule(), LLVMStaticTy, - /*isConstant=*/false, llvm::GlobalValue::WeakAnyLinkage, - llvm::UndefValue::get(LLVMStaticTy), - "_openmp_shared_static_glob_rd_$_", /*InsertBefore=*/nullptr, - llvm::GlobalValue::NotThreadLocal, - C.getTargetAddressSpace(LangAS::cuda_shared)); - auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( - GV, CGM.VoidPtrTy); - for (const GlobalPtrSizeRecsTy *Rec : SharedRecs) { - Rec->Buffer->replaceAllUsesWith(Replacement); - Rec->Buffer->eraseFromParent(); - } - } - StaticRD->completeDefinition(); - if (!StaticRD->field_empty()) { - QualType StaticTy = C.getRecordType(StaticRD); - std::pair SMsBlockPerSM = getSMsBlocksPerSM(CGM); - llvm::APInt Size1(32, SMsBlockPerSM.second); - QualType Arr1Ty = - C.getConstantArrayType(StaticTy, Size1, nullptr, ArrayType::Normal, - /*IndexTypeQuals=*/0); - llvm::APInt Size2(32, SMsBlockPerSM.first); - QualType Arr2Ty = - C.getConstantArrayType(Arr1Ty, Size2, nullptr, ArrayType::Normal, - /*IndexTypeQuals=*/0); - llvm::Type *LLVMArr2Ty = CGM.getTypes().ConvertTypeForMem(Arr2Ty); - // FIXME: nvlink does not handle weak linkage correctly (object with the - // different size are reported as erroneous). - // Restore CommonLinkage as soon as nvlink is fixed. - auto *GV = new llvm::GlobalVariable( - CGM.getModule(), LLVMArr2Ty, - /*isConstant=*/false, llvm::GlobalValue::InternalLinkage, - llvm::Constant::getNullValue(LLVMArr2Ty), - "_openmp_static_glob_rd_$_"); - auto *Replacement = llvm::ConstantExpr::getPointerBitCastOrAddrSpaceCast( - GV, CGM.VoidPtrTy); - for (const GlobalPtrSizeRecsTy *Rec : GlobalRecs) { - Rec->Buffer->replaceAllUsesWith(Replacement); - Rec->Buffer->eraseFromParent(); - } - } - } + if (!TeamsReductions.empty()) { ASTContext &C = CGM.getContext(); RecordDecl *StaticRD = C.buildImplicitRecord( diff --git a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h index 7267511ca672a..07e9225aa5713 100644 --- a/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h +++ b/clang/lib/CodeGen/CGOpenMPRuntimeGPU.h @@ -440,15 +440,9 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime { /// The data for the single globalized variable. struct MappedVarData { /// Corresponding field in the global record. - const FieldDecl *FD = nullptr; + llvm::Value *GlobalizedVal = nullptr; /// Corresponding address. Address PrivateAddr = Address::invalid(); - /// true, if only one element is required (for latprivates in SPMD mode), - /// false, if need to create based on the warp-size. - bool IsOnePerTeam = false; - MappedVarData() = delete; - MappedVarData(const FieldDecl *FD, bool IsOnePerTeam = false) - : FD(FD), IsOnePerTeam(IsOnePerTeam) {} }; /// The map of local variables to their addresses in the global memory. using DeclToAddrMapTy = llvm::MapVector; @@ -460,29 +454,12 @@ class CGOpenMPRuntimeGPU : public CGOpenMPRuntime { EscapedParamsTy EscapedParameters; llvm::SmallVector EscapedVariableLengthDecls; llvm::SmallVector EscapedVariableLengthDeclsAddrs; - const RecordDecl *GlobalRecord = nullptr; - llvm::Optional SecondaryGlobalRecord = llvm::None; - llvm::Value *GlobalRecordAddr = nullptr; llvm::Value *IsInSPMDModeFlag = nullptr; std::unique_ptr MappedParams; }; /// Maps the function to the list of the globalized variables with their /// addresses. llvm::SmallDenseMap FunctionGlobalizedDecls; - /// List of records for the globalized variables in target/teams/distribute - /// contexts. Inner records are going to be joined into the single record, - /// while those resulting records are going to be joined into the single - /// union. This resulting union (one per CU) is the entry point for the static - /// memory management runtime functions. - struct GlobalPtrSizeRecsTy { - llvm::GlobalVariable *UseSharedMemory = nullptr; - llvm::GlobalVariable *RecSize = nullptr; - llvm::GlobalVariable *Buffer = nullptr; - SourceLocation Loc; - llvm::SmallVector Records; - unsigned RegionCounter = 0; - }; - llvm::SmallVector GlobalizedRecords; llvm::GlobalVariable *KernelTeamsReductionPtr = nullptr; /// List of the records with the list of fields for the reductions across the /// teams. Used to build the intermediate buffer for the fast teams diff --git a/clang/lib/CodeGen/CGStmtOpenMP.cpp b/clang/lib/CodeGen/CGStmtOpenMP.cpp index 9dd1edbfd1e56..ba497a5b9d3a7 100644 --- a/clang/lib/CodeGen/CGStmtOpenMP.cpp +++ b/clang/lib/CodeGen/CGStmtOpenMP.cpp @@ -2323,8 +2323,7 @@ void CodeGenFunction::EmitOMPLinearClause( } static void emitSimdlenSafelenClause(CodeGenFunction &CGF, - const OMPExecutableDirective &D, - bool IsMonotonic) { + const OMPExecutableDirective &D) { if (!CGF.HaveInsertPoint()) return; if (const auto *C = D.getSingleClause()) { @@ -2335,8 +2334,7 @@ static void emitSimdlenSafelenClause(CodeGenFunction &CGF, // In presence of finite 'safelen', it may be unsafe to mark all // the memory instructions parallel, because loop-carried // dependences of 'safelen' iterations are possible. - if (!IsMonotonic) - CGF.LoopStack.setParallel(!D.getSingleClause()); + CGF.LoopStack.setParallel(!D.getSingleClause()); } else if (const auto *C = D.getSingleClause()) { RValue Len = CGF.EmitAnyExpr(C->getSafelen(), AggValueSlot::ignored(), /*ignoreResult=*/true); @@ -2349,12 +2347,11 @@ static void emitSimdlenSafelenClause(CodeGenFunction &CGF, } } -void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D, - bool IsMonotonic) { +void CodeGenFunction::EmitOMPSimdInit(const OMPLoopDirective &D) { // Walk clauses and process safelen/lastprivate. - LoopStack.setParallel(!IsMonotonic); + LoopStack.setParallel(/*Enable=*/true); LoopStack.setVectorizeEnable(); - emitSimdlenSafelenClause(*this, D, IsMonotonic); + emitSimdlenSafelenClause(*this, D); if (const auto *C = D.getSingleClause()) if (C->getKind() == OMPC_ORDER_concurrent) LoopStack.setParallel(/*Enable=*/true); @@ -2677,7 +2674,7 @@ void CodeGenFunction::EmitOMPOuterLoop( if (C->getKind() == OMPC_ORDER_concurrent) CGF.LoopStack.setParallel(/*Enable=*/true); } else { - CGF.EmitOMPSimdInit(S, IsMonotonic); + CGF.EmitOMPSimdInit(S); } }, [&S, &LoopArgs, LoopExit, &CodeGenLoop, IVSize, IVSigned, &CodeGenOrdered, @@ -3187,8 +3184,7 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( isOpenMPLoopBoundSharingDirective(S.getDirectiveKind()); bool IsMonotonic = Ordered || - ((ScheduleKind.Schedule == OMPC_SCHEDULE_static || - ScheduleKind.Schedule == OMPC_SCHEDULE_unknown) && + (ScheduleKind.Schedule == OMPC_SCHEDULE_static && !(ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_nonmonotonic || ScheduleKind.M2 == OMPC_SCHEDULE_MODIFIER_nonmonotonic)) || ScheduleKind.M1 == OMPC_SCHEDULE_MODIFIER_monotonic || @@ -3201,9 +3197,9 @@ bool CodeGenFunction::EmitOMPWorksharingLoop( getJumpDestInCurrentScope(createBasicBlock("omp.loop.exit")); emitCommonSimdLoop( *this, S, - [&S, IsMonotonic](CodeGenFunction &CGF, PrePostActionTy &) { + [&S](CodeGenFunction &CGF, PrePostActionTy &) { if (isOpenMPSimdDirective(S.getDirectiveKind())) { - CGF.EmitOMPSimdInit(S, IsMonotonic); + CGF.EmitOMPSimdInit(S); } else if (const auto *C = S.getSingleClause()) { if (C->getKind() == OMPC_ORDER_concurrent) CGF.LoopStack.setParallel(/*Enable=*/true); @@ -5225,7 +5221,7 @@ void CodeGenFunction::EmitOMPDistributeLoop(const OMPLoopDirective &S, *this, S, [&S](CodeGenFunction &CGF, PrePostActionTy &) { if (isOpenMPSimdDirective(S.getDirectiveKind())) - CGF.EmitOMPSimdInit(S, /*IsMonotonic=*/true); + CGF.EmitOMPSimdInit(S); }, [&S, &LoopScope, Cond, IncExpr, LoopExit, &CodeGenLoop, StaticChunked](CodeGenFunction &CGF, PrePostActionTy &) { diff --git a/clang/lib/CodeGen/CodeGenAction.cpp b/clang/lib/CodeGen/CodeGenAction.cpp index 432a63dcb1054..8b3074766d8e8 100644 --- a/clang/lib/CodeGen/CodeGenAction.cpp +++ b/clang/lib/CodeGen/CodeGenAction.cpp @@ -565,7 +565,9 @@ BackendConsumer::StackSizeDiagHandler(const llvm::DiagnosticInfoStackSize &D) { // FIXME: Shouldn't need to truncate to uint32_t Diags.Report(ND->getASTContext().getFullLoc(ND->getLocation()), diag::warn_fe_frame_larger_than) - << static_cast(D.getStackSize()) << Decl::castToDeclContext(ND); + << static_cast(D.getStackSize()) + << static_cast(D.getStackLimit()) + << Decl::castToDeclContext(ND); return true; } diff --git a/clang/lib/CodeGen/CodeGenFunction.cpp b/clang/lib/CodeGen/CodeGenFunction.cpp index de46b0cf56fa7..f119084e29495 100644 --- a/clang/lib/CodeGen/CodeGenFunction.cpp +++ b/clang/lib/CodeGen/CodeGenFunction.cpp @@ -1235,6 +1235,10 @@ void CodeGenFunction::StartFunction(GlobalDecl GD, QualType RetTy, Fn->addFnAttr("packed-stack"); } + if (CGM.getCodeGenOpts().WarnStackSize != UINT_MAX) + Fn->addFnAttr("warn-stack-size", + std::to_string(CGM.getCodeGenOpts().WarnStackSize)); + if (RetTy->isVoidType()) { // Void type; nothing to return. ReturnValue = Address::invalid(); @@ -1523,11 +1527,10 @@ void CodeGenFunction::GenerateCode(GlobalDecl GD, llvm::Function *Fn, // Emit the standard function prologue. StartFunction(GD, ResTy, Fn, FnInfo, Args, Loc, BodyRange.getBegin()); - SyclOptReportHandler &OptReportHandler = - CGM.getDiags().getSYCLOptReportHandler(); - if (OptReportHandler.HasOptReportInfo(FD)) { + SyclOptReportHandler &SyclOptReport = CGM.getDiags().getSYCLOptReport(); + if (SyclOptReport.HasOptReportInfo(FD)) { llvm::OptimizationRemarkEmitter ORE(Fn); - for (auto ORI : llvm::enumerate(OptReportHandler.GetInfo(FD))) { + for (auto ORI : llvm::enumerate(SyclOptReport.GetInfo(FD))) { llvm::DiagnosticLocation DL = SourceLocToDebugLoc(ORI.value().KernelArgLoc); StringRef NameInDesc = ORI.value().KernelArgDescName; diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index e12f47dde285e..f1aed1c9b06b1 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -3600,7 +3600,7 @@ class CodeGenFunction : public CodeGenTypeCache { const CodeGenLoopTy &CodeGenLoop, Expr *IncExpr); /// Helpers for the OpenMP loop directives. - void EmitOMPSimdInit(const OMPLoopDirective &D, bool IsMonotonic = false); + void EmitOMPSimdInit(const OMPLoopDirective &D); void EmitOMPSimdFinal( const OMPLoopDirective &D, const llvm::function_ref CondGen); diff --git a/clang/lib/CodeGen/CodeGenModule.cpp b/clang/lib/CodeGen/CodeGenModule.cpp index e5a690facb953..34cdf31d07b4d 100644 --- a/clang/lib/CodeGen/CodeGenModule.cpp +++ b/clang/lib/CodeGen/CodeGenModule.cpp @@ -708,6 +708,13 @@ void CodeGenModule::Release() { if (LangOpts.EHAsynch) getModule().addModuleFlag(llvm::Module::Warning, "eh-asynch", 1); + // Indicate whether this Module was compiled with -fopenmp + if (getLangOpts().OpenMP && !getLangOpts().OpenMPSimd) + getModule().addModuleFlag(llvm::Module::Max, "openmp", LangOpts.OpenMP); + if (getLangOpts().OpenMPIsDevice) + getModule().addModuleFlag(llvm::Module::Max, "openmp-device", + LangOpts.OpenMP); + // Emit OpenCL specific module metadata: OpenCL/SPIR version. if (LangOpts.OpenCL) { EmitOpenCLMetadata(); @@ -821,8 +828,6 @@ void CodeGenModule::Release() { getCodeGenOpts().StackProtectorGuardOffset); if (getCodeGenOpts().StackAlignment) getModule().setOverrideStackAlignment(getCodeGenOpts().StackAlignment); - if (getCodeGenOpts().WarnStackSize != UINT_MAX) - getModule().setWarnStackSize(getCodeGenOpts().WarnStackSize); getTargetCodeGenInfo().emitTargetMetadata(*this, MangledDeclNames); diff --git a/clang/lib/CodeGen/TargetInfo.cpp b/clang/lib/CodeGen/TargetInfo.cpp index 7bd131704f56d..ae6ba443b0995 100644 --- a/clang/lib/CodeGen/TargetInfo.cpp +++ b/clang/lib/CodeGen/TargetInfo.cpp @@ -2610,7 +2610,7 @@ static std::string qualifyWindowsLibrary(llvm::StringRef Lib) { bool Quote = (Lib.find(' ') != StringRef::npos); std::string ArgStr = Quote ? "\"" : ""; ArgStr += Lib; - if (!Lib.endswith_lower(".lib") && !Lib.endswith_lower(".a")) + if (!Lib.endswith_insensitive(".lib") && !Lib.endswith_insensitive(".a")) ArgStr += ".lib"; ArgStr += Quote ? "\"" : ""; return ArgStr; diff --git a/clang/lib/Driver/Driver.cpp b/clang/lib/Driver/Driver.cpp index 562cb21a7a54c..992ce26be0490 100644 --- a/clang/lib/Driver/Driver.cpp +++ b/clang/lib/Driver/Driver.cpp @@ -609,9 +609,9 @@ static llvm::Triple computeTargetTriple(const Driver &D, A = Args.getLastArg(options::OPT_march_EQ); if (A && Target.isRISCV()) { StringRef ArchName = A->getValue(); - if (ArchName.startswith_lower("rv32")) + if (ArchName.startswith_insensitive("rv32")) Target.setArch(llvm::Triple::riscv32); - else if (ArchName.startswith_lower("rv64")) + else if (ArchName.startswith_insensitive("rv64")) Target.setArch(llvm::Triple::riscv64); } @@ -909,7 +909,7 @@ void Driver::CreateOffloadingDeviceToolChains(Compilation &C, if (SYCLTargetsValues) { if (SYCLTargetsValues->getNumValues()) { for (StringRef Val : SYCLTargetsValues->getValues()) { - llvm::Triple TT(Val); + llvm::Triple TT(MakeSYCLDeviceTriple(Val)); if (!isValidSYCLTriple(TT)) { Diag(clang::diag::err_drv_invalid_sycl_target) << Val; continue; @@ -1852,18 +1852,24 @@ void Driver::PrintHelp(bool ShowHidden) const { ExcludedFlagsBitmask |= options::FlangOnlyOption; std::string Usage = llvm::formatv("{0} [options] file...", Name).str(); - getOpts().PrintHelp(llvm::outs(), Usage.c_str(), DriverTitle.c_str(), + getOpts().printHelp(llvm::outs(), Usage.c_str(), DriverTitle.c_str(), IncludedFlagsBitmask, ExcludedFlagsBitmask, /*ShowAllAliases=*/false); } llvm::Triple Driver::MakeSYCLDeviceTriple(StringRef TargetArch) const { - llvm::Triple TT; - TT.setArchName(TargetArch); - TT.setVendor(llvm::Triple::UnknownVendor); - TT.setOS(llvm::Triple::UnknownOS); - TT.setEnvironment(llvm::Triple::SYCLDevice); - return TT; + SmallVector SYCLAlias = {"spir", "spir64", "spir64_fpga", + "spir64_x86_64", "spir64_gen"}; + if (std::find(SYCLAlias.begin(), SYCLAlias.end(), TargetArch) != + SYCLAlias.end()) { + llvm::Triple TT; + TT.setArchName(TargetArch); + TT.setVendor(llvm::Triple::UnknownVendor); + TT.setOS(llvm::Triple::UnknownOS); + TT.setEnvironment(llvm::Triple::SYCLDevice); + return TT; + } + return llvm::Triple(TargetArch); } // Print the help from any of the given tools which are used for AOT @@ -2033,7 +2039,7 @@ void Driver::HandleAutocompletions(StringRef PassedFlags) const { // case-insensitive sorting for consistency with the -help option // which prints out options in the case-insensitive alphabetical order. llvm::sort(SuggestedCompletions, [](StringRef A, StringRef B) { - if (int X = A.compare_lower(B)) + if (int X = A.compare_insensitive(B)) return X < 0; return A.compare(B) > 0; }); @@ -4624,7 +4630,7 @@ class OffloadingActionBuilder final { if (SYCLTargets) { llvm::StringMap FoundNormalizedTriples; for (const char *Val : SYCLTargets->getValues()) { - llvm::Triple TT(Val); + llvm::Triple TT(C.getDriver().MakeSYCLDeviceTriple(Val)); std::string NormalizedName = TT.normalize(); // Make sure we don't have a duplicate triple. @@ -5116,7 +5122,8 @@ void Driver::handleArguments(Compilation &C, DerivedArgList &Args, if (Args.hasArg(options::OPT_emit_llvm)) Diag(clang::diag::err_drv_emit_llvm_link); if (IsCLMode() && LTOMode != LTOK_None && - !Args.getLastArgValue(options::OPT_fuse_ld_EQ).equals_lower("lld")) + !Args.getLastArgValue(options::OPT_fuse_ld_EQ) + .equals_insensitive("lld")) Diag(clang::diag::err_drv_lto_without_lld); } @@ -7197,7 +7204,7 @@ const ToolChain &Driver::getToolChain(const ArgList &Args, case llvm::Triple::UnknownEnvironment: case llvm::Triple::SYCLDevice: if (Args.getLastArgValue(options::OPT_fuse_ld_EQ) - .startswith_lower("bfd")) + .startswith_insensitive("bfd")) TC = std::make_unique( *this, Target, Args); else diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp index 503685ab533a0..ed8c7e94b0134 100644 --- a/clang/lib/Driver/ToolChains/Arch/AArch64.cpp +++ b/clang/lib/Driver/ToolChains/Arch/AArch64.cpp @@ -185,12 +185,25 @@ getAArch64MicroArchFeaturesFromMcpu(const Driver &D, StringRef Mcpu, void aarch64::getAArch64TargetFeatures(const Driver &D, const llvm::Triple &Triple, const ArgList &Args, - std::vector &Features) { + std::vector &Features, + bool ForAS) { Arg *A; bool success = true; // Enable NEON by default. Features.push_back("+neon"); - if ((A = Args.getLastArg(options::OPT_march_EQ))) + llvm::StringRef WaMArch = ""; + if (ForAS) + for (const auto *A : + Args.filtered(options::OPT_Wa_COMMA, options::OPT_Xassembler)) + for (StringRef Value : A->getValues()) + if (Value.startswith("-march=")) + WaMArch = Value.substr(7); + // Call getAArch64ArchFeaturesFromMarch only if "-Wa,-march=" or + // "-Xassembler -march" is detected. Otherwise it may return false + // and causes Clang to error out. + if (WaMArch.size()) + success = getAArch64ArchFeaturesFromMarch(D, WaMArch, Args, Features); + else if ((A = Args.getLastArg(options::OPT_march_EQ))) success = getAArch64ArchFeaturesFromMarch(D, A->getValue(), Args, Features); else if ((A = Args.getLastArg(options::OPT_mcpu_EQ))) success = getAArch64ArchFeaturesFromMcpu(D, A->getValue(), Args, Features); diff --git a/clang/lib/Driver/ToolChains/Arch/AArch64.h b/clang/lib/Driver/ToolChains/Arch/AArch64.h index 713af870d69fb..d47c402d4a42d 100644 --- a/clang/lib/Driver/ToolChains/Arch/AArch64.h +++ b/clang/lib/Driver/ToolChains/Arch/AArch64.h @@ -22,7 +22,8 @@ namespace aarch64 { void getAArch64TargetFeatures(const Driver &D, const llvm::Triple &Triple, const llvm::opt::ArgList &Args, - std::vector &Features); + std::vector &Features, + bool ForAS); std::string getAArch64TargetCPU(const llvm::opt::ArgList &Args, const llvm::Triple &Triple, llvm::opt::Arg *&A); diff --git a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp index c7f2a3ea5e023..f131d5321070c 100644 --- a/clang/lib/Driver/ToolChains/Arch/RISCV.cpp +++ b/clang/lib/Driver/ToolChains/Arch/RISCV.cpp @@ -610,17 +610,19 @@ StringRef riscv::getRISCVABI(const ArgList &Args, const llvm::Triple &Triple) { // rv64* -> lp64 StringRef MArch = getRISCVArch(Args, Triple); - if (MArch.startswith_lower("rv32")) { + if (MArch.startswith_insensitive("rv32")) { // FIXME: parse `March` to find `D` extension properly - if (MArch.substr(4).contains_lower("d") || MArch.startswith_lower("rv32g")) + if (MArch.substr(4).contains_insensitive("d") || + MArch.startswith_insensitive("rv32g")) return "ilp32d"; - else if (MArch.startswith_lower("rv32e")) + else if (MArch.startswith_insensitive("rv32e")) return "ilp32e"; else return "ilp32"; - } else if (MArch.startswith_lower("rv64")) { + } else if (MArch.startswith_insensitive("rv64")) { // FIXME: parse `March` to find `D` extension properly - if (MArch.substr(4).contains_lower("d") || MArch.startswith_lower("rv64g")) + if (MArch.substr(4).contains_insensitive("d") || + MArch.startswith_insensitive("rv64g")) return "lp64d"; else return "lp64"; @@ -696,11 +698,11 @@ StringRef riscv::getRISCVArch(const llvm::opt::ArgList &Args, if (const Arg *A = Args.getLastArg(options::OPT_mabi_EQ)) { StringRef MABI = A->getValue(); - if (MABI.equals_lower("ilp32e")) + if (MABI.equals_insensitive("ilp32e")) return "rv32e"; - else if (MABI.startswith_lower("ilp32")) + else if (MABI.startswith_insensitive("ilp32")) return "rv32imafdc"; - else if (MABI.startswith_lower("lp64")) + else if (MABI.startswith_insensitive("lp64")) return "rv64imafdc"; } diff --git a/clang/lib/Driver/ToolChains/Clang.cpp b/clang/lib/Driver/ToolChains/Clang.cpp index 28d0ce3a05525..41149cf07ce30 100644 --- a/clang/lib/Driver/ToolChains/Clang.cpp +++ b/clang/lib/Driver/ToolChains/Clang.cpp @@ -355,7 +355,7 @@ static void getTargetFeatures(const Driver &D, const llvm::Triple &Triple, case llvm::Triple::aarch64: case llvm::Triple::aarch64_32: case llvm::Triple::aarch64_be: - aarch64::getAArch64TargetFeatures(D, Triple, Args, Features); + aarch64::getAArch64TargetFeatures(D, Triple, Args, Features, ForAS); break; case llvm::Triple::x86: case llvm::Triple::x86_64: @@ -812,6 +812,20 @@ static void addPGOAndCoverageFlags(const ToolChain &TC, Compilation &C, PGOGenerateArg = nullptr; } + if (TC.getTriple().isOSAIX()) { + if (PGOGenerateArg) + if (!D.isUsingLTO(false /*IsDeviceOffloadAction */) || + D.getLTOMode() != LTOK_Full) + D.Diag(clang::diag::err_drv_argument_only_allowed_with) + << PGOGenerateArg->getSpelling() << "-flto"; + if (ProfileGenerateArg) + D.Diag(diag::err_drv_unsupported_opt_for_target) + << ProfileGenerateArg->getSpelling() << TC.getTriple().str(); + if (Arg *ProfileSampleUseArg = getLastProfileSampleUseArg(Args)) + D.Diag(diag::err_drv_unsupported_opt_for_target) + << ProfileSampleUseArg->getSpelling() << TC.getTriple().str(); + } + if (ProfileGenerateArg) { if (ProfileGenerateArg->getOption().matches( options::OPT_fprofile_instr_generate_EQ)) @@ -2114,7 +2128,7 @@ static void SetRISCVSmallDataLimit(const ToolChain &TC, const ArgList &Args, D.Diag(diag::warn_drv_unsupported_sdata); } } else if (Args.getLastArgValue(options::OPT_mcmodel_EQ) - .equals_lower("large") && + .equals_insensitive("large") && (Triple.getArch() == llvm::Triple::riscv64)) { // Not support linker relaxation for RV64 with large code model. SmallDataLimit = "0"; @@ -3337,7 +3351,8 @@ static void RenderOpenCLOptions(const ArgList &Args, ArgStringList &CmdArgs, CmdArgs.push_back(Args.MakeArgString(A->getOption().getPrefixedName())); // Only add the default headers if we are compiling OpenCL sources. - if ((types::isOpenCL(InputType) || Args.hasArg(options::OPT_cl_std_EQ)) && + if ((types::isOpenCL(InputType) || + (Args.hasArg(options::OPT_cl_std_EQ) && types::isSrcFile(InputType))) && !Args.hasArg(options::OPT_cl_no_stdinc)) { CmdArgs.push_back("-finclude-default-header"); CmdArgs.push_back("-fdeclare-opencl-builtins"); @@ -4676,12 +4691,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (HasFPGA) CmdArgs.push_back("-fsycl-disable-range-rounding"); - // Enable generation of USM address spaces for FPGA. - // __ENABLE_USM_ADDR_SPACE__ will be used during compilation of SYCL headers - if (getToolChain().getTriple().getSubArch() == - llvm::Triple::SPIRSubArch_fpga) - CmdArgs.push_back("-D__ENABLE_USM_ADDR_SPACE__"); - // Add any options that are needed specific to SYCL offload while // performing the host side compilation. if (!IsSYCLOffloadDevice) { @@ -5224,7 +5233,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, if (A->getOption().getID() == options::OPT_mabi_EQ_vec_extabi) CmdArgs.push_back("-mabi=vec-extabi"); else - D.Diag(diag::err_aix_default_altivec_abi); + CmdArgs.push_back("-mabi=vec-default"); } if (Arg *A = Args.getLastArg(options::OPT_Wframe_larger_than_EQ)) { @@ -6134,13 +6143,6 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, options::OPT_fno_openmp_cuda_mode, /*Default=*/false)) CmdArgs.push_back("-fopenmp-cuda-mode"); - // When in OpenMP offloading mode with NVPTX target, forward - // cuda-parallel-target-regions flag - if (Args.hasFlag(options::OPT_fopenmp_cuda_parallel_target_regions, - options::OPT_fno_openmp_cuda_parallel_target_regions, - /*Default=*/true)) - CmdArgs.push_back("-fopenmp-cuda-parallel-target-regions"); - // When in OpenMP offloading mode with NVPTX target, check if full runtime // is required. if (Args.hasFlag(options::OPT_fopenmp_cuda_force_full_runtime, @@ -6756,7 +6758,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // -finput_charset=UTF-8 is default. Reject others if (Arg *inputCharset = Args.getLastArg(options::OPT_finput_charset_EQ)) { StringRef value = inputCharset->getValue(); - if (!value.equals_lower("utf-8")) + if (!value.equals_insensitive("utf-8")) D.Diag(diag::err_drv_invalid_value) << inputCharset->getAsString(Args) << value; } @@ -6764,7 +6766,7 @@ void Clang::ConstructJob(Compilation &C, const JobAction &JA, // -fexec_charset=UTF-8 is default. Reject others if (Arg *execCharset = Args.getLastArg(options::OPT_fexec_charset_EQ)) { StringRef value = execCharset->getValue(); - if (!value.equals_lower("utf-8")) + if (!value.equals_insensitive("utf-8")) D.Diag(diag::err_drv_invalid_value) << execCharset->getAsString(Args) << value; } @@ -7736,17 +7738,17 @@ void Clang::AddClangCLArgs(const ArgList &Args, types::ID InputType, StringRef GuardArgs = A->getValue(); // The only valid options are "cf", "cf,nochecks", "cf-", "ehcont" and // "ehcont-". - if (GuardArgs.equals_lower("cf")) { + if (GuardArgs.equals_insensitive("cf")) { // Emit CFG instrumentation and the table of address-taken functions. CmdArgs.push_back("-cfguard"); - } else if (GuardArgs.equals_lower("cf,nochecks")) { + } else if (GuardArgs.equals_insensitive("cf,nochecks")) { // Emit only the table of address-taken functions. CmdArgs.push_back("-cfguard-no-checks"); - } else if (GuardArgs.equals_lower("ehcont")) { + } else if (GuardArgs.equals_insensitive("ehcont")) { // Emit EH continuation table. CmdArgs.push_back("-ehcontguard"); - } else if (GuardArgs.equals_lower("cf-") || - GuardArgs.equals_lower("ehcont-")) { + } else if (GuardArgs.equals_insensitive("cf-") || + GuardArgs.equals_insensitive("ehcont-")) { // Do nothing, but we might want to emit a security warning in future. } else { D.Diag(diag::err_drv_invalid_value) << A->getSpelling() << GuardArgs; @@ -8606,7 +8608,11 @@ void SPIRVTranslator::ConstructJob(Compilation &C, const JobAction &JA, TranslatorArgs.push_back(Output.getFilename()); if (getToolChain().getTriple().isSYCLDeviceEnvironment()) { TranslatorArgs.push_back("-spirv-max-version=1.3"); - TranslatorArgs.push_back("-spirv-debug-info-version=ocl-100"); + // TODO: align debug info for FPGA H/W when its SPIR-V consumer is ready + if (C.getDriver().isFPGAEmulationMode()) + TranslatorArgs.push_back("-spirv-debug-info-version=ocl-100"); + else + TranslatorArgs.push_back("-spirv-debug-info-version=legacy"); // Prevent crash in the translator if input IR contains DIExpression // operations which don't have mapping to OpenCL.DebugInfo.100 spec. TranslatorArgs.push_back("-spirv-allow-extra-diexpressions"); @@ -8637,23 +8643,14 @@ void SPIRVTranslator::ConstructJob(Compilation &C, const JobAction &JA, ",+SPV_INTEL_long_constant_composite" ",+SPV_INTEL_fpga_invocation_pipelining_attributes"; ExtArg = ExtArg + DefaultExtArg + INTELExtArg; - if (getToolChain().getTriple().getSubArch() == - llvm::Triple::SPIRSubArch_fpga) { - for (auto *A : TCArgs) { - if (A->getOption().matches(options::OPT_Xs_separate) || - A->getOption().matches(options::OPT_Xs)) { - StringRef ArgString(A->getValue()); - // Enable SPV_INTEL_usm_storage_classes only for FPGA hardware, - // since it adds new storage classes that represent global_device and - // global_host address spaces, which are not supported for all - // targets. With the extension disabled the storage classes will be - // lowered to CrossWorkgroup storage class that is mapped to just - // global address space. - if (ArgString == "hardware" || ArgString == "simulation") - ExtArg += ",+SPV_INTEL_usm_storage_classes"; - } - } - } + if (!C.getDriver().isFPGAEmulationMode()) + // Enable SPV_INTEL_usm_storage_classes only for FPGA hardware, + // since it adds new storage classes that represent global_device and + // global_host address spaces, which are not supported for all + // targets. With the extension disabled the storage classes will be + // lowered to CrossWorkgroup storage class that is mapped to just + // global address space. + ExtArg += ",+SPV_INTEL_usm_storage_classes"; TranslatorArgs.push_back(TCArgs.MakeArgString(ExtArg)); } for (auto I : Inputs) { diff --git a/clang/lib/Driver/ToolChains/FreeBSD.cpp b/clang/lib/Driver/ToolChains/FreeBSD.cpp index f8c6a81bf3bc0..5dcf74dabf4fc 100644 --- a/clang/lib/Driver/ToolChains/FreeBSD.cpp +++ b/clang/lib/Driver/ToolChains/FreeBSD.cpp @@ -293,6 +293,8 @@ void freebsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, addLinkerCompressDebugSectionsOption(ToolChain, Args, CmdArgs); AddLinkerInputs(ToolChain, Inputs, Args, CmdArgs, JA); + bool Profiling = Args.hasArg(options::OPT_pg) && + ToolChain.getTriple().getOSMajorVersion() < 14; if (!Args.hasArg(options::OPT_nostdlib, options::OPT_nodefaultlibs)) { // Use the static OpenMP runtime with -static-openmp bool StaticOpenMP = Args.hasArg(options::OPT_static_openmp) && @@ -302,7 +304,7 @@ void freebsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, if (D.CCCIsCXX()) { if (ToolChain.ShouldLinkCXXStdlib(Args)) ToolChain.AddCXXStdlibLibArgs(Args, CmdArgs); - if (Args.hasArg(options::OPT_pg)) + if (Profiling) CmdArgs.push_back("-lm_p"); else CmdArgs.push_back("-lm"); @@ -313,13 +315,13 @@ void freebsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, linkXRayRuntimeDeps(ToolChain, CmdArgs); // FIXME: For some reason GCC passes -lgcc and -lgcc_s before adding // the default system libraries. Just mimic this for now. - if (Args.hasArg(options::OPT_pg)) + if (Profiling) CmdArgs.push_back("-lgcc_p"); else CmdArgs.push_back("-lgcc"); if (Args.hasArg(options::OPT_static)) { CmdArgs.push_back("-lgcc_eh"); - } else if (Args.hasArg(options::OPT_pg)) { + } else if (Profiling) { CmdArgs.push_back("-lgcc_eh_p"); } else { CmdArgs.push_back("--as-needed"); @@ -328,13 +330,13 @@ void freebsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, } if (Args.hasArg(options::OPT_pthread)) { - if (Args.hasArg(options::OPT_pg)) + if (Profiling) CmdArgs.push_back("-lpthread_p"); else CmdArgs.push_back("-lpthread"); } - if (Args.hasArg(options::OPT_pg)) { + if (Profiling) { if (Args.hasArg(options::OPT_shared)) CmdArgs.push_back("-lc"); else @@ -347,7 +349,7 @@ void freebsd::Linker::ConstructJob(Compilation &C, const JobAction &JA, if (Args.hasArg(options::OPT_static)) { CmdArgs.push_back("-lgcc_eh"); - } else if (Args.hasArg(options::OPT_pg)) { + } else if (Profiling) { CmdArgs.push_back("-lgcc_eh_p"); } else { CmdArgs.push_back("--as-needed"); @@ -416,7 +418,8 @@ void FreeBSD::addLibStdCxxIncludePaths( void FreeBSD::AddCXXStdlibLibArgs(const ArgList &Args, ArgStringList &CmdArgs) const { CXXStdlibType Type = GetCXXStdlibType(Args); - bool Profiling = Args.hasArg(options::OPT_pg); + bool Profiling = + Args.hasArg(options::OPT_pg) && getTriple().getOSMajorVersion() < 14; switch (Type) { case ToolChain::CST_Libcxx: diff --git a/clang/lib/Driver/ToolChains/Fuchsia.cpp b/clang/lib/Driver/ToolChains/Fuchsia.cpp index 502afdc1e30c9..fd9804a7f3532 100644 --- a/clang/lib/Driver/ToolChains/Fuchsia.cpp +++ b/clang/lib/Driver/ToolChains/Fuchsia.cpp @@ -54,8 +54,8 @@ void fuchsia::Linker::ConstructJob(Compilation &C, const JobAction &JA, CmdArgs.push_back("now"); const char *Exec = Args.MakeArgString(ToolChain.GetLinkerPath()); - if (llvm::sys::path::filename(Exec).equals_lower("ld.lld") || - llvm::sys::path::stem(Exec).equals_lower("ld.lld")) { + if (llvm::sys::path::filename(Exec).equals_insensitive("ld.lld") || + llvm::sys::path::stem(Exec).equals_insensitive("ld.lld")) { CmdArgs.push_back("-z"); CmdArgs.push_back("rodynamic"); CmdArgs.push_back("-z"); diff --git a/clang/lib/Driver/ToolChains/Gnu.cpp b/clang/lib/Driver/ToolChains/Gnu.cpp index 45e0029ef4b89..6ca974b83b3ee 100644 --- a/clang/lib/Driver/ToolChains/Gnu.cpp +++ b/clang/lib/Driver/ToolChains/Gnu.cpp @@ -51,9 +51,9 @@ static void normalizeCPUNamesForAssembler(const ArgList &Args, ArgStringList &CmdArgs) { if (Arg *A = Args.getLastArg(options::OPT_mcpu_EQ)) { StringRef CPUArg(A->getValue()); - if (CPUArg.equals_lower("krait")) + if (CPUArg.equals_insensitive("krait")) CmdArgs.push_back("-mcpu=cortex-a15"); - else if(CPUArg.equals_lower("kryo")) + else if (CPUArg.equals_insensitive("kryo")) CmdArgs.push_back("-mcpu=cortex-a57"); else Args.AddLastArg(CmdArgs, options::OPT_mcpu_EQ); diff --git a/clang/lib/Driver/ToolChains/Hexagon.cpp b/clang/lib/Driver/ToolChains/Hexagon.cpp index e58b666dbfc00..df97da5a20d6d 100644 --- a/clang/lib/Driver/ToolChains/Hexagon.cpp +++ b/clang/lib/Driver/ToolChains/Hexagon.cpp @@ -38,7 +38,7 @@ static void handleHVXWarnings(const Driver &D, const ArgList &Args) { // Handle the unsupported values passed to mhvx-length. if (Arg *A = Args.getLastArg(options::OPT_mhexagon_hvx_length_EQ)) { StringRef Val = A->getValue(); - if (!Val.equals_lower("64b") && !Val.equals_lower("128b")) + if (!Val.equals_insensitive("64b") && !Val.equals_insensitive("128b")) D.Diag(diag::err_drv_unsupported_option_argument) << A->getOption().getName() << Val; } @@ -218,8 +218,8 @@ constructHexagonLinkArgs(Compilation &C, const JobAction &JA, bool IncDefLibs = !Args.hasArg(options::OPT_nodefaultlibs); bool UseG0 = false; const char *Exec = Args.MakeArgString(HTC.GetLinkerPath()); - bool UseLLD = (llvm::sys::path::filename(Exec).equals_lower("ld.lld") || - llvm::sys::path::stem(Exec).equals_lower("ld.lld")); + bool UseLLD = (llvm::sys::path::filename(Exec).equals_insensitive("ld.lld") || + llvm::sys::path::stem(Exec).equals_insensitive("ld.lld")); bool UseShared = IsShared && !IsStatic; StringRef CpuVer = toolchains::HexagonToolChain::GetTargetCPUVersion(Args); diff --git a/clang/lib/Driver/ToolChains/MSVC.cpp b/clang/lib/Driver/ToolChains/MSVC.cpp index 6174cbc8d5134..a7956965f84a8 100644 --- a/clang/lib/Driver/ToolChains/MSVC.cpp +++ b/clang/lib/Driver/ToolChains/MSVC.cpp @@ -181,24 +181,25 @@ findVCToolChainViaEnvironment(llvm::vfs::FileSystem &VFS, std::string &Path, // whatever/VC/bin --> old toolchain, VC dir is toolchain dir. llvm::StringRef TestPath = PathEntry; - bool IsBin = llvm::sys::path::filename(TestPath).equals_lower("bin"); + bool IsBin = + llvm::sys::path::filename(TestPath).equals_insensitive("bin"); if (!IsBin) { // Strip any architecture subdir like "amd64". TestPath = llvm::sys::path::parent_path(TestPath); - IsBin = llvm::sys::path::filename(TestPath).equals_lower("bin"); + IsBin = llvm::sys::path::filename(TestPath).equals_insensitive("bin"); } if (IsBin) { llvm::StringRef ParentPath = llvm::sys::path::parent_path(TestPath); llvm::StringRef ParentFilename = llvm::sys::path::filename(ParentPath); - if (ParentFilename.equals_lower("VC")) { + if (ParentFilename.equals_insensitive("VC")) { Path = std::string(ParentPath); VSLayout = MSVCToolChain::ToolsetLayout::OlderVS; return true; } - if (ParentFilename.equals_lower("x86ret") || - ParentFilename.equals_lower("x86chk") || - ParentFilename.equals_lower("amd64ret") || - ParentFilename.equals_lower("amd64chk")) { + if (ParentFilename.equals_insensitive("x86ret") || + ParentFilename.equals_insensitive("x86chk") || + ParentFilename.equals_insensitive("amd64ret") || + ParentFilename.equals_insensitive("amd64chk")) { Path = std::string(ParentPath); VSLayout = MSVCToolChain::ToolsetLayout::DevDivInternal; return true; @@ -217,7 +218,7 @@ findVCToolChainViaEnvironment(llvm::vfs::FileSystem &VFS, std::string &Path, for (llvm::StringRef Prefix : ExpectedPrefixes) { if (It == End) goto NotAToolChain; - if (!It->startswith_lower(Prefix)) + if (!It->startswith_insensitive(Prefix)) goto NotAToolChain; ++It; } @@ -573,14 +574,15 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA, // Control Flow Guard checks if (Arg *A = Args.getLastArg(options::OPT__SLASH_guard)) { StringRef GuardArgs = A->getValue(); - if (GuardArgs.equals_lower("cf") || GuardArgs.equals_lower("cf,nochecks")) { + if (GuardArgs.equals_insensitive("cf") || + GuardArgs.equals_insensitive("cf,nochecks")) { // MSVC doesn't yet support the "nochecks" modifier. CmdArgs.push_back("-guard:cf"); - } else if (GuardArgs.equals_lower("cf-")) { + } else if (GuardArgs.equals_insensitive("cf-")) { CmdArgs.push_back("-guard:cf-"); - } else if (GuardArgs.equals_lower("ehcont")) { + } else if (GuardArgs.equals_insensitive("ehcont")) { CmdArgs.push_back("-guard:ehcont"); - } else if (GuardArgs.equals_lower("ehcont-")) { + } else if (GuardArgs.equals_insensitive("ehcont-")) { CmdArgs.push_back("-guard:ehcont-"); } } @@ -657,10 +659,10 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA, = Args.getLastArgValue(options::OPT_fuse_ld_EQ, CLANG_DEFAULT_LINKER); if (Linker.empty()) Linker = "link"; - if (Linker.equals_lower("lld")) + if (Linker.equals_insensitive("lld")) Linker = "lld-link"; - if (Linker.equals_lower("link")) { + if (Linker.equals_insensitive("link")) { // If we're using the MSVC linker, it's not sufficient to just use link // from the program PATH, because other environments like GnuWin32 install // their own link.exe which may come first. @@ -719,7 +721,7 @@ void visualstudio::Linker::ConstructJob(Compilation &C, const JobAction &JA, // find it. for (const char *Cursor = EnvBlock.data(); *Cursor != '\0';) { llvm::StringRef EnvVar(Cursor); - if (EnvVar.startswith_lower("path=")) { + if (EnvVar.startswith_insensitive("path=")) { using SubDirectoryType = toolchains::MSVCToolChain::SubDirectoryType; constexpr size_t PrefixLen = 5; // strlen("path=") Environment.push_back(Args.MakeArgString( @@ -1316,23 +1318,35 @@ void MSVCToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, for (const auto &Path : DriverArgs.getAllArgValues(options::OPT__SLASH_imsvc)) addSystemInclude(DriverArgs, CC1Args, Path); + auto AddSystemIncludesFromEnv = [&](StringRef Var) -> bool { + if (auto Val = llvm::sys::Process::GetEnv(Var)) { + SmallVector Dirs; + StringRef(*Val).split(Dirs, ";", /*MaxSplit=*/-1, /*KeepEmpty=*/false); + if (!Dirs.empty()) { + addSystemIncludes(DriverArgs, CC1Args, Dirs); + return true; + } + } + return false; + }; + + // Add %INCLUDE%-like dirs via /external:env: flags. + for (const auto &Var : + DriverArgs.getAllArgValues(options::OPT__SLASH_external_env)) { + AddSystemIncludesFromEnv(Var); + } + if (DriverArgs.hasArg(options::OPT_nostdlibinc)) return; - // Honor %INCLUDE%. It should know essential search paths with vcvarsall.bat. - // Skip if the user expressly set a vctoolsdir + // Honor %INCLUDE% and %EXTERNAL_INCLUDE%. It should have essential search + // paths set by vcvarsall.bat. Skip if the user expressly set a vctoolsdir. if (!DriverArgs.getLastArg(options::OPT__SLASH_vctoolsdir, options::OPT__SLASH_winsysroot)) { - if (llvm::Optional cl_include_dir = - llvm::sys::Process::GetEnv("INCLUDE")) { - SmallVector Dirs; - StringRef(*cl_include_dir) - .split(Dirs, ";", /*MaxSplit=*/-1, /*KeepEmpty=*/false); - for (StringRef Dir : Dirs) - addSystemInclude(DriverArgs, CC1Args, Dir); - if (!Dirs.empty()) - return; - } + bool Found = AddSystemIncludesFromEnv("INCLUDE"); + Found |= AddSystemIncludesFromEnv("EXTERNAL_INCLUDE"); + if (Found) + return; } // When built with access to the proper Windows APIs, try to actually find @@ -1354,7 +1368,7 @@ void MSVCToolChain::AddClangSystemIncludeArgs(const ArgList &DriverArgs, } std::string WindowsSDKDir; - int major; + int major = 0; std::string windowsSDKIncludeVersion; std::string windowsSDKLibVersion; if (getWindowsSDKDir(getVFS(), DriverArgs, WindowsSDKDir, major, diff --git a/clang/lib/Driver/ToolChains/MinGW.cpp b/clang/lib/Driver/ToolChains/MinGW.cpp index 4ebcdf6a94455..467a5479bfb74 100644 --- a/clang/lib/Driver/ToolChains/MinGW.cpp +++ b/clang/lib/Driver/ToolChains/MinGW.cpp @@ -427,7 +427,7 @@ toolchains::MinGW::MinGW(const Driver &D, const llvm::Triple &Triple, NativeLLVMSupport = Args.getLastArgValue(options::OPT_fuse_ld_EQ, CLANG_DEFAULT_LINKER) - .equals_lower("lld"); + .equals_insensitive("lld"); } bool toolchains::MinGW::IsIntegratedAssemblerDefault() const { return true; } diff --git a/clang/lib/Driver/ToolChains/SYCL.cpp b/clang/lib/Driver/ToolChains/SYCL.cpp index d74572e2b8226..dc92efdeb5d16 100644 --- a/clang/lib/Driver/ToolChains/SYCL.cpp +++ b/clang/lib/Driver/ToolChains/SYCL.cpp @@ -66,7 +66,10 @@ const char *SYCL::Linker::constructLLVMSpirvCommand( } else { CmdArgs.push_back("-spirv-max-version=1.3"); CmdArgs.push_back("-spirv-ext=+all"); - CmdArgs.push_back("-spirv-debug-info-version=ocl-100"); + if (!C.getDriver().isFPGAEmulationMode()) + CmdArgs.push_back("-spirv-debug-info-version=legacy"); + else + CmdArgs.push_back("-spirv-debug-info-version=ocl-100"); CmdArgs.push_back("-spirv-allow-extra-diexpressions"); CmdArgs.push_back("-spirv-allow-unknown-intrinsics=llvm.genx."); CmdArgs.push_back("-o"); @@ -677,7 +680,7 @@ void SYCLToolChain::TranslateTargetOpt(const llvm::opt::ArgList &Args, OptNoTriple = A->getOption().matches(Opt); if (A->getOption().matches(Opt_EQ)) { // Passing device args: -X= -opt=val. - if (A->getValue() != getTripleString()) + if (getDriver().MakeSYCLDeviceTriple(A->getValue()) != getTriple()) // Provided triple does not match current tool chain. continue; } else if (!OptNoTriple) diff --git a/clang/lib/Format/ContinuationIndenter.cpp b/clang/lib/Format/ContinuationIndenter.cpp index cbf016f4b1666..8fbc15f27922a 100644 --- a/clang/lib/Format/ContinuationIndenter.cpp +++ b/clang/lib/Format/ContinuationIndenter.cpp @@ -1906,12 +1906,12 @@ ContinuationIndenter::createBreakableToken(const FormatToken &Current, LineState &State, bool AllowBreak) { unsigned StartColumn = State.Column - Current.ColumnWidth; if (Current.isStringLiteral()) { - // FIXME: String literal breaking is currently disabled for C#, Java and - // JavaScript, as it requires strings to be merged using "+" which we + // FIXME: String literal breaking is currently disabled for C#, Java, Json + // and JavaScript, as it requires strings to be merged using "+" which we // don't support. if (Style.Language == FormatStyle::LK_Java || Style.Language == FormatStyle::LK_JavaScript || Style.isCSharp() || - !Style.BreakStringLiterals || !AllowBreak) + Style.isJson() || !Style.BreakStringLiterals || !AllowBreak) return nullptr; // Don't break string literals inside preprocessor directives (except for diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index 53cbbf66e85ab..2b860d2a25f7b 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -63,6 +63,7 @@ template <> struct ScalarEnumerationTraits { IO.enumCase(Value, "TableGen", FormatStyle::LK_TableGen); IO.enumCase(Value, "TextProto", FormatStyle::LK_TextProto); IO.enumCase(Value, "CSharp", FormatStyle::LK_CSharp); + IO.enumCase(Value, "Json", FormatStyle::LK_Json); } }; @@ -85,6 +86,15 @@ template <> struct ScalarEnumerationTraits { } }; +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &IO, + FormatStyle::LambdaBodyIndentationKind &Value) { + IO.enumCase(Value, "Signature", FormatStyle::LBI_Signature); + IO.enumCase(Value, "OuterScope", FormatStyle::LBI_OuterScope); + } +}; + template <> struct ScalarEnumerationTraits { static void enumeration(IO &IO, FormatStyle::UseTabStyle &Value) { IO.enumCase(Value, "Never", FormatStyle::UT_Never); @@ -408,6 +418,16 @@ struct ScalarEnumerationTraits { } }; +template <> +struct ScalarEnumerationTraits { + static void enumeration(IO &IO, FormatStyle::ReferenceAlignmentStyle &Value) { + IO.enumCase(Value, "Pointer", FormatStyle::RAS_Pointer); + IO.enumCase(Value, "Middle", FormatStyle::RAS_Middle); + IO.enumCase(Value, "Left", FormatStyle::RAS_Left); + IO.enumCase(Value, "Right", FormatStyle::RAS_Right); + } +}; + template <> struct ScalarEnumerationTraits { static void enumeration(IO &IO, @@ -415,8 +435,8 @@ struct ScalarEnumerationTraits { IO.enumCase(Value, "Never", FormatStyle::SBPO_Never); IO.enumCase(Value, "ControlStatements", FormatStyle::SBPO_ControlStatements); - IO.enumCase(Value, "ControlStatementsExceptForEachMacros", - FormatStyle::SBPO_ControlStatementsExceptForEachMacros); + IO.enumCase(Value, "ControlStatementsExceptControlMacros", + FormatStyle::SBPO_ControlStatementsExceptControlMacros); IO.enumCase(Value, "NonEmptyParentheses", FormatStyle::SBPO_NonEmptyParentheses); IO.enumCase(Value, "Always", FormatStyle::SBPO_Always); @@ -424,6 +444,8 @@ struct ScalarEnumerationTraits { // For backward compatibility. IO.enumCase(Value, "false", FormatStyle::SBPO_Never); IO.enumCase(Value, "true", FormatStyle::SBPO_ControlStatements); + IO.enumCase(Value, "ControlStatementsExceptForEachMacros", + FormatStyle::SBPO_ControlStatementsExceptControlMacros); } }; @@ -628,6 +650,8 @@ template <> struct MappingTraits { Style.ExperimentalAutoDetectBinPacking); IO.mapOptional("FixNamespaceComments", Style.FixNamespaceComments); IO.mapOptional("ForEachMacros", Style.ForEachMacros); + IO.mapOptional("IfMacros", Style.IfMacros); + IO.mapOptional("IncludeBlocks", Style.IncludeStyle.IncludeBlocks); IO.mapOptional("IncludeCategories", Style.IncludeStyle.IncludeCategories); IO.mapOptional("IncludeIsMainRegex", Style.IncludeStyle.IncludeIsMainRegex); @@ -649,6 +673,7 @@ template <> struct MappingTraits { IO.mapOptional("JavaScriptWrapImports", Style.JavaScriptWrapImports); IO.mapOptional("KeepEmptyLinesAtTheStartOfBlocks", Style.KeepEmptyLinesAtTheStartOfBlocks); + IO.mapOptional("LambdaBodyIndentation", Style.LambdaBodyIndentation); IO.mapOptional("MacroBlockBegin", Style.MacroBlockBegin); IO.mapOptional("MacroBlockEnd", Style.MacroBlockEnd); IO.mapOptional("MaxEmptyLinesToKeep", Style.MaxEmptyLinesToKeep); @@ -678,6 +703,7 @@ template <> struct MappingTraits { IO.mapOptional("PointerAlignment", Style.PointerAlignment); IO.mapOptional("PPIndentWidth", Style.PPIndentWidth); IO.mapOptional("RawStringFormats", Style.RawStringFormats); + IO.mapOptional("ReferenceAlignment", Style.ReferenceAlignment); IO.mapOptional("ReflowComments", Style.ReflowComments); IO.mapOptional("ShortNamespaceLines", Style.ShortNamespaceLines); IO.mapOptional("SortIncludes", Style.SortIncludes); @@ -1021,6 +1047,7 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.ForEachMacros.push_back("foreach"); LLVMStyle.ForEachMacros.push_back("Q_FOREACH"); LLVMStyle.ForEachMacros.push_back("BOOST_FOREACH"); + LLVMStyle.IfMacros.push_back("KJ_IF_MAYBE"); LLVMStyle.IncludeStyle.IncludeCategories = { {"^\"(llvm|llvm-c|clang|clang-c)/", 2, 0, false}, {"^(<|\"(gtest|gmock|isl|json)/)", 3, 0, false}, @@ -1040,6 +1067,7 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.JavaScriptQuotes = FormatStyle::JSQS_Leave; LLVMStyle.JavaScriptWrapImports = true; LLVMStyle.TabWidth = 8; + LLVMStyle.LambdaBodyIndentation = FormatStyle::LBI_Signature; LLVMStyle.MaxEmptyLinesToKeep = 1; LLVMStyle.KeepEmptyLinesAtTheStartOfBlocks = true; LLVMStyle.NamespaceIndentation = FormatStyle::NI_None; @@ -1049,6 +1077,7 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { LLVMStyle.ObjCSpaceAfterProperty = false; LLVMStyle.ObjCSpaceBeforeProtocolList = true; LLVMStyle.PointerAlignment = FormatStyle::PAS_Right; + LLVMStyle.ReferenceAlignment = FormatStyle::RAS_Pointer; LLVMStyle.ShortNamespaceLines = 1; LLVMStyle.SpacesBeforeTrailingComments = 1; LLVMStyle.Standard = FormatStyle::LS_Latest; @@ -1105,6 +1134,9 @@ FormatStyle getLLVMStyle(FormatStyle::LanguageKind Language) { if (Language == FormatStyle::LK_TableGen) { LLVMStyle.SpacesInContainerLiterals = false; } + if (LLVMStyle.isJson()) { + LLVMStyle.ColumnLimit = 0; + } return LLVMStyle; } @@ -1413,23 +1445,23 @@ FormatStyle getNoStyle() { bool getPredefinedStyle(StringRef Name, FormatStyle::LanguageKind Language, FormatStyle *Style) { - if (Name.equals_lower("llvm")) { + if (Name.equals_insensitive("llvm")) { *Style = getLLVMStyle(Language); - } else if (Name.equals_lower("chromium")) { + } else if (Name.equals_insensitive("chromium")) { *Style = getChromiumStyle(Language); - } else if (Name.equals_lower("mozilla")) { + } else if (Name.equals_insensitive("mozilla")) { *Style = getMozillaStyle(); - } else if (Name.equals_lower("google")) { + } else if (Name.equals_insensitive("google")) { *Style = getGoogleStyle(Language); - } else if (Name.equals_lower("webkit")) { + } else if (Name.equals_insensitive("webkit")) { *Style = getWebKitStyle(); - } else if (Name.equals_lower("gnu")) { + } else if (Name.equals_insensitive("gnu")) { *Style = getGNUStyle(); - } else if (Name.equals_lower("microsoft")) { + } else if (Name.equals_insensitive("microsoft")) { *Style = getMicrosoftStyle(Language); - } else if (Name.equals_lower("none")) { + } else if (Name.equals_insensitive("none")) { *Style = getNoStyle(); - } else if (Name.equals_lower("inheritparentconfig")) { + } else if (Name.equals_insensitive("inheritparentconfig")) { Style->InheritsParentConfig = true; } else { return false; @@ -1739,10 +1771,12 @@ class Formatter : public TokenAnalyzer { Tok = Tok->Next; } } - if (Style.DerivePointerAlignment) + if (Style.DerivePointerAlignment) { Style.PointerAlignment = countVariableAlignments(AnnotatedLines) <= 0 ? FormatStyle::PAS_Left : FormatStyle::PAS_Right; + Style.ReferenceAlignment = FormatStyle::RAS_Pointer; + } if (Style.Standard == FormatStyle::LS_Auto) Style.Standard = hasCpp03IncompatibleFormat(AnnotatedLines) ? FormatStyle::LS_Latest @@ -2795,6 +2829,25 @@ reformat(const FormatStyle &Style, StringRef Code, if (Expanded.Language == FormatStyle::LK_JavaScript && isMpegTS(Code)) return {tooling::Replacements(), 0}; + // JSON only needs the formatting passing. + if (Style.isJson()) { + std::vector Ranges(1, tooling::Range(0, Code.size())); + auto Env = + std::make_unique(Code, FileName, Ranges, FirstStartColumn, + NextStartColumn, LastStartColumn); + // Perform the actual formatting pass. + tooling::Replacements Replaces = + Formatter(*Env, Style, Status).process().first; + // add a replacement to remove the "x = " from the result. + if (!Replaces.add(tooling::Replacement(FileName, 0, 4, ""))) { + // apply the reformatting changes and the removal of "x = ". + if (applyAllReplacements(Code, Replaces)) { + return {Replaces, 0}; + } + } + return {tooling::Replacements(), 0}; + } + typedef std::function( const Environment &)> AnalyzerPass; @@ -2943,23 +2996,26 @@ const char *StyleOptionHelpDescription = static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) { if (FileName.endswith(".java")) return FormatStyle::LK_Java; - if (FileName.endswith_lower(".js") || FileName.endswith_lower(".mjs") || - FileName.endswith_lower(".ts")) + if (FileName.endswith_insensitive(".js") || + FileName.endswith_insensitive(".mjs") || + FileName.endswith_insensitive(".ts")) return FormatStyle::LK_JavaScript; // (module) JavaScript or TypeScript. if (FileName.endswith(".m") || FileName.endswith(".mm")) return FormatStyle::LK_ObjC; - if (FileName.endswith_lower(".proto") || - FileName.endswith_lower(".protodevel")) + if (FileName.endswith_insensitive(".proto") || + FileName.endswith_insensitive(".protodevel")) return FormatStyle::LK_Proto; - if (FileName.endswith_lower(".textpb") || - FileName.endswith_lower(".pb.txt") || - FileName.endswith_lower(".textproto") || - FileName.endswith_lower(".asciipb")) + if (FileName.endswith_insensitive(".textpb") || + FileName.endswith_insensitive(".pb.txt") || + FileName.endswith_insensitive(".textproto") || + FileName.endswith_insensitive(".asciipb")) return FormatStyle::LK_TextProto; - if (FileName.endswith_lower(".td")) + if (FileName.endswith_insensitive(".td")) return FormatStyle::LK_TableGen; - if (FileName.endswith_lower(".cs")) + if (FileName.endswith_insensitive(".cs")) return FormatStyle::LK_CSharp; + if (FileName.endswith_insensitive(".json")) + return FormatStyle::LK_Json; return FormatStyle::LK_Cpp; } @@ -3018,7 +3074,7 @@ llvm::Expected getStyle(StringRef StyleName, StringRef FileName, // If the style inherits the parent configuration it is a command line // configuration, which wants to inherit, so we have to skip the check of the // StyleName. - if (!Style.InheritsParentConfig && !StyleName.equals_lower("file")) { + if (!Style.InheritsParentConfig && !StyleName.equals_insensitive("file")) { if (!getPredefinedStyle(StyleName, Style.Language, &Style)) return make_string_error("Invalid value for -style"); if (!Style.InheritsParentConfig) diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index 57c5eeb5a091f..0506cd554bcba 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -52,6 +52,7 @@ namespace format { TYPE(FunctionDeclarationName) \ TYPE(FunctionLBrace) \ TYPE(FunctionTypeLParen) \ + TYPE(IfMacro) \ TYPE(ImplicitStringLiteral) \ TYPE(InheritanceColon) \ TYPE(InheritanceComma) \ diff --git a/clang/lib/Format/FormatTokenLexer.cpp b/clang/lib/Format/FormatTokenLexer.cpp index 0faa18c348289..a9cfb4a247f09 100644 --- a/clang/lib/Format/FormatTokenLexer.cpp +++ b/clang/lib/Format/FormatTokenLexer.cpp @@ -39,6 +39,8 @@ FormatTokenLexer::FormatTokenLexer( for (const std::string &ForEachMacro : Style.ForEachMacros) Macros.insert({&IdentTable.get(ForEachMacro), TT_ForEachMacro}); + for (const std::string &IfMacro : Style.IfMacros) + Macros.insert({&IdentTable.get(IfMacro), TT_IfMacro}); for (const std::string &AttributeMacro : Style.AttributeMacros) Macros.insert({&IdentTable.get(AttributeMacro), TT_AttributeMacro}); for (const std::string &StatementMacro : Style.StatementMacros) @@ -1014,6 +1016,13 @@ FormatToken *FormatTokenLexer::getNextToken() { tok::pp_define) && it != Macros.end()) { FormatTok->setType(it->second); + if (it->second == TT_IfMacro) { + // The lexer token currently has type tok::kw_unknown. However, for this + // substitution to be treated correctly in the TokenAnnotator, faking + // the tok value seems to be needed. Not sure if there's a more elegant + // way. + FormatTok->Tok.setKind(tok::kw_if); + } } else if (FormatTok->is(tok::identifier)) { if (MacroBlockBeginRegex.match(Text)) { FormatTok->setType(TT_MacroBlockBegin); diff --git a/clang/lib/Format/SortJavaScriptImports.cpp b/clang/lib/Format/SortJavaScriptImports.cpp index 901204c297f9d..a5e3ce69207bd 100644 --- a/clang/lib/Format/SortJavaScriptImports.cpp +++ b/clang/lib/Format/SortJavaScriptImports.cpp @@ -113,7 +113,7 @@ bool operator<(const JsModuleReference &LHS, const JsModuleReference &RHS) { // Empty URLs sort *last* (for export {...};). if (LHS.URL.empty() != RHS.URL.empty()) return LHS.URL.empty() < RHS.URL.empty(); - if (int Res = LHS.URL.compare_lower(RHS.URL)) + if (int Res = LHS.URL.compare_insensitive(RHS.URL)) return Res < 0; // '*' imports (with prefix) sort before {a, b, ...} imports. if (LHS.Prefix.empty() != RHS.Prefix.empty()) @@ -327,7 +327,7 @@ class JavaScriptImportSorter : public TokenAnalyzer { SmallVector Symbols = Reference.Symbols; llvm::stable_sort( Symbols, [&](const JsImportedSymbol &LHS, const JsImportedSymbol &RHS) { - return LHS.Symbol.compare_lower(RHS.Symbol) < 0; + return LHS.Symbol.compare_insensitive(RHS.Symbol) < 0; }); if (!Reference.SymbolsMerged && Symbols == Reference.Symbols) { // Symbols didn't change, just emit the entire module reference. diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp old mode 100755 new mode 100644 index 48309af24aa81..aa69ff88bd747 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -1409,7 +1409,7 @@ class AnnotatingParser { // Reset token type in case we have already looked at it and then // recovered from an error (e.g. failure to find the matching >). if (!CurrentToken->isOneOf( - TT_LambdaLSquare, TT_LambdaLBrace, TT_AttributeMacro, + TT_LambdaLSquare, TT_LambdaLBrace, TT_AttributeMacro, TT_IfMacro, TT_ForEachMacro, TT_TypenameMacro, TT_FunctionLBrace, TT_ImplicitStringLiteral, TT_InlineASMBrace, TT_FatArrow, TT_LambdaArrow, TT_NamespaceMacro, TT_OverloadedOperator, @@ -2900,6 +2900,8 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, const FormatToken &Right) { if (Left.is(tok::kw_return) && Right.isNot(tok::semi)) return true; + if (Style.isJson() && Left.is(tok::string_literal) && Right.is(tok::colon)) + return false; if (Left.is(Keywords.kw_assert) && Style.Language == FormatStyle::LK_Java) return true; if (Style.ObjCSpaceAfterProperty && Line.Type == LT_ObjCProperty && @@ -2988,16 +2990,17 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, Style.SpaceAroundPointerQualifiers == FormatStyle::SAPQ_Both) && (Left.is(TT_AttributeParen) || Left.canBePointerOrReferenceQualifier())) return true; - return (Left.Tok.isLiteral() || - (!Left.isOneOf(TT_PointerOrReference, tok::l_paren) && - (Style.PointerAlignment != FormatStyle::PAS_Left || - (Line.IsMultiVariableDeclStmt && - (Left.NestingLevel == 0 || - (Left.NestingLevel == 1 && Line.First->is(tok::kw_for))))))); + return ( + Left.Tok.isLiteral() || + (!Left.isOneOf(TT_PointerOrReference, tok::l_paren) && + (getTokenPointerOrReferenceAlignment(Right) != FormatStyle::PAS_Left || + (Line.IsMultiVariableDeclStmt && + (Left.NestingLevel == 0 || + (Left.NestingLevel == 1 && Line.First->is(tok::kw_for))))))); } if (Right.is(TT_FunctionTypeLParen) && Left.isNot(tok::l_paren) && (!Left.is(TT_PointerOrReference) || - (Style.PointerAlignment != FormatStyle::PAS_Right && + (getTokenPointerOrReferenceAlignment(Left) != FormatStyle::PAS_Right && !Line.IsMultiVariableDeclStmt))) return true; if (Left.is(TT_PointerOrReference)) { @@ -3013,7 +3016,8 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, (Right.is(tok::l_brace) && Right.is(BK_Block)) || (!Right.isOneOf(TT_PointerOrReference, TT_ArraySubscriptLSquare, tok::l_paren) && - (Style.PointerAlignment != FormatStyle::PAS_Right && + (getTokenPointerOrReferenceAlignment(Left) != + FormatStyle::PAS_Right && !Line.IsMultiVariableDeclStmt) && Left.Previous && !Left.Previous->isOneOf(tok::l_paren, tok::coloncolon, @@ -3120,9 +3124,13 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, (Left.is(tok::r_square) && Left.is(TT_AttributeSquare))) return true; if (Style.SpaceBeforeParens == - FormatStyle::SBPO_ControlStatementsExceptForEachMacros && + FormatStyle::SBPO_ControlStatementsExceptControlMacros && Left.is(TT_ForEachMacro)) return false; + if (Style.SpaceBeforeParens == + FormatStyle::SBPO_ControlStatementsExceptControlMacros && + Left.is(TT_IfMacro)) + return false; return Line.Type == LT_ObjCDecl || Left.is(tok::semi) || (Style.SpaceBeforeParens != FormatStyle::SBPO_Never && (Left.isOneOf(tok::pp_elif, tok::kw_for, tok::kw_while, @@ -3178,7 +3186,7 @@ bool TokenAnnotator::spaceRequiredBetween(const AnnotatedLine &Line, // Match const and volatile ref-qualifiers without any additional // qualifiers such as // void Fn() const &; - return Style.PointerAlignment != FormatStyle::PAS_Left; + return getTokenReferenceAlignment(Right) != FormatStyle::PAS_Left; return true; } @@ -3221,6 +3229,9 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, // and "%d %d" if (Left.is(tok::numeric_constant) && Right.is(tok::percent)) return HasExistingWhitespace(); + } else if (Style.isJson()) { + if (Right.is(tok::colon)) + return false; } else if (Style.isCSharp()) { // Require spaces around '{' and before '}' unless they appear in // interpolated strings. Interpolated strings are merged into a single token @@ -3530,11 +3541,11 @@ bool TokenAnnotator::spaceRequiredBefore(const AnnotatedLine &Line, // Space before TT_StructuredBindingLSquare. if (Right.is(TT_StructuredBindingLSquare)) return !Left.isOneOf(tok::amp, tok::ampamp) || - Style.PointerAlignment != FormatStyle::PAS_Right; + getTokenReferenceAlignment(Left) != FormatStyle::PAS_Right; // Space before & or && following a TT_StructuredBindingLSquare. if (Right.Next && Right.Next->is(TT_StructuredBindingLSquare) && Right.isOneOf(tok::amp, tok::ampamp)) - return Style.PointerAlignment != FormatStyle::PAS_Left; + return getTokenReferenceAlignment(Right) != FormatStyle::PAS_Left; if ((Right.is(TT_BinaryOperator) && !Left.is(tok::l_paren)) || (Left.isOneOf(TT_BinaryOperator, TT_ConditionalExpr) && !Right.is(tok::r_paren))) @@ -3573,42 +3584,11 @@ isItAnEmptyLambdaAllowed(const FormatToken &Tok, return Tok.Children.empty() && ShortLambdaOption != FormatStyle::SLS_None; } -static bool -isItAInlineLambdaAllowed(const FormatToken &Tok, - FormatStyle::ShortLambdaStyle ShortLambdaOption) { - return (ShortLambdaOption == FormatStyle::SLS_Inline && - IsFunctionArgument(Tok)) || - (ShortLambdaOption == FormatStyle::SLS_All); -} - -static bool isOneChildWithoutMustBreakBefore(const FormatToken &Tok) { - if (Tok.Children.size() != 1) - return false; - FormatToken *curElt = Tok.Children[0]->First; - while (curElt) { - if (curElt->MustBreakBefore) - return false; - curElt = curElt->Next; - } - return true; -} static bool isAllmanLambdaBrace(const FormatToken &Tok) { return (Tok.is(tok::l_brace) && Tok.is(BK_Block) && !Tok.isOneOf(TT_ObjCBlockLBrace, TT_DictLiteral)); } -static bool isAllmanBraceIncludedBreakableLambda( - const FormatToken &Tok, FormatStyle::ShortLambdaStyle ShortLambdaOption) { - if (!isAllmanLambdaBrace(Tok)) - return false; - - if (isItAnEmptyLambdaAllowed(Tok, ShortLambdaOption)) - return false; - - return !isItAInlineLambdaAllowed(Tok, ShortLambdaOption) || - !isOneChildWithoutMustBreakBefore(Tok); -} - bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, const FormatToken &Right) { const FormatToken &Left = *Right.Previous; @@ -3695,6 +3675,26 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, return true; } + // Basic JSON newline processing. + if (Style.isJson()) { + // Always break after a JSON record opener. + // { + // } + if (Left.is(TT_DictLiteral) && Left.is(tok::l_brace)) + return true; + // Always break after a JSON array opener. + // [ + // ] + if (Left.is(TT_ArrayInitializerLSquare) && Left.is(tok::l_square) && + !Right.is(tok::r_square)) + return true; + // Always break afer successive entries. + // 1, + // 2 + if (Left.is(tok::comma)) + return true; + } + // If the last token before a '}', ']', or ')' is a comma or a trailing // comment, the intention is to insert a line break after it in order to make // shuffling around entries easier. Import statements, especially in @@ -3770,13 +3770,6 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, if (Right.is(TT_InlineASMBrace)) return Right.HasUnescapedNewline; - auto ShortLambdaOption = Style.AllowShortLambdasOnASingleLine; - if (Style.BraceWrapping.BeforeLambdaBody && - (isAllmanBraceIncludedBreakableLambda(Left, ShortLambdaOption) || - isAllmanBraceIncludedBreakableLambda(Right, ShortLambdaOption))) { - return true; - } - if (isAllmanBrace(Left) || isAllmanBrace(Right)) return (Line.startsWith(tok::kw_enum) && Style.BraceWrapping.AfterEnum) || (Line.startsWith(tok::kw_typedef, tok::kw_enum) && @@ -3799,6 +3792,11 @@ bool TokenAnnotator::mustBreakBefore(const AnnotatedLine &Line, return true; } + if (Style.BraceWrapping.BeforeLambdaBody && Right.is(TT_LambdaLBrace) && + Left.isOneOf(tok::star, tok::amp, tok::ampamp, TT_TemplateCloser)) { + return true; + } + // Put multiple Java annotation on a new line. if ((Style.Language == FormatStyle::LK_Java || Style.Language == FormatStyle::LK_JavaScript) && @@ -4030,7 +4028,8 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, return !Right.is(tok::l_paren); if (Right.is(TT_PointerOrReference)) return Line.IsMultiVariableDeclStmt || - (Style.PointerAlignment == FormatStyle::PAS_Right && + (getTokenPointerOrReferenceAlignment(Right) == + FormatStyle::PAS_Right && (!Right.Next || Right.Next->isNot(TT_FunctionDeclarationName))); if (Right.isOneOf(TT_StartOfName, TT_FunctionDeclarationName) || Right.is(tok::kw_operator)) @@ -4202,7 +4201,7 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, return false; auto ShortLambdaOption = Style.AllowShortLambdasOnASingleLine; - if (Style.BraceWrapping.BeforeLambdaBody) { + if (Style.BraceWrapping.BeforeLambdaBody && Right.is(TT_LambdaLBrace)) { if (isAllmanLambdaBrace(Left)) return !isItAnEmptyLambdaAllowed(Left, ShortLambdaOption); if (isAllmanLambdaBrace(Right)) @@ -4214,7 +4213,6 @@ bool TokenAnnotator::canBreakBefore(const AnnotatedLine &Line, Right.isMemberAccess() || Right.isOneOf(TT_TrailingReturnArrow, TT_LambdaArrow, tok::lessless, tok::colon, tok::l_square, tok::at) || - (Style.BraceWrapping.BeforeLambdaBody && Right.is(TT_LambdaLBrace)) || (Left.is(tok::r_paren) && Right.isOneOf(tok::identifier, tok::kw_const)) || (Left.is(tok::l_paren) && !Right.is(tok::r_paren)) || @@ -4245,5 +4243,41 @@ void TokenAnnotator::printDebugInfo(const AnnotatedLine &Line) { llvm::errs() << "----\n"; } +FormatStyle::PointerAlignmentStyle +TokenAnnotator::getTokenReferenceAlignment(const FormatToken &Reference) { + assert(Reference.isOneOf(tok::amp, tok::ampamp)); + switch (Style.ReferenceAlignment) { + case FormatStyle::RAS_Pointer: + return Style.PointerAlignment; + case FormatStyle::RAS_Left: + return FormatStyle::PAS_Left; + case FormatStyle::RAS_Right: + return FormatStyle::PAS_Right; + case FormatStyle::RAS_Middle: + return FormatStyle::PAS_Middle; + } + assert(0); //"Unhandled value of ReferenceAlignment" + return Style.PointerAlignment; +} + +FormatStyle::PointerAlignmentStyle +TokenAnnotator::getTokenPointerOrReferenceAlignment( + const FormatToken &PointerOrReference) { + if (PointerOrReference.isOneOf(tok::amp, tok::ampamp)) { + switch (Style.ReferenceAlignment) { + case FormatStyle::RAS_Pointer: + return Style.PointerAlignment; + case FormatStyle::RAS_Left: + return FormatStyle::PAS_Left; + case FormatStyle::RAS_Right: + return FormatStyle::PAS_Right; + case FormatStyle::RAS_Middle: + return FormatStyle::PAS_Middle; + } + } + assert(PointerOrReference.is(tok::star)); + return Style.PointerAlignment; +} + } // namespace format } // namespace clang diff --git a/clang/lib/Format/TokenAnnotator.h b/clang/lib/Format/TokenAnnotator.h index 7f2dff561e7b4..0f9c02dbeb34b 100644 --- a/clang/lib/Format/TokenAnnotator.h +++ b/clang/lib/Format/TokenAnnotator.h @@ -195,6 +195,11 @@ class TokenAnnotator { FormatToken *calculateInitializerColumnList(AnnotatedLine &Line, FormatToken *CurrentToken, unsigned Depth); + FormatStyle::PointerAlignmentStyle + getTokenReferenceAlignment(const FormatToken &PointerOrReference); + + FormatStyle::PointerAlignmentStyle + getTokenPointerOrReferenceAlignment(const FormatToken &PointerOrReference); const FormatStyle &Style; diff --git a/clang/lib/Format/UnwrappedLineFormatter.cpp b/clang/lib/Format/UnwrappedLineFormatter.cpp index 41fbb16f0db12..cca85c1074de5 100644 --- a/clang/lib/Format/UnwrappedLineFormatter.cpp +++ b/clang/lib/Format/UnwrappedLineFormatter.cpp @@ -823,8 +823,20 @@ class LineFormatter { return true; if (NewLine) { - int AdditionalIndent = State.Stack.back().Indent - - Previous.Children[0]->Level * Style.IndentWidth; + const ParenState &P = State.Stack.back(); + + int AdditionalIndent = + P.Indent - Previous.Children[0]->Level * Style.IndentWidth; + + if (Style.LambdaBodyIndentation == FormatStyle::LBI_OuterScope && + P.NestedBlockIndent == P.LastSpace) { + if (State.NextToken->MatchingParen && + State.NextToken->MatchingParen->is(TT_LambdaLBrace)) { + State.Stack.pop_back(); + } + if (LBrace->is(TT_LambdaLBrace)) + AdditionalIndent = 0; + } Penalty += BlockFormatter->format(Previous.Children, DryRun, AdditionalIndent, @@ -1122,6 +1134,7 @@ unsigned UnwrappedLineFormatter::format( unsigned Penalty = 0; LevelIndentTracker IndentTracker(Style, Keywords, Lines[0]->Level, AdditionalIndent); + const AnnotatedLine *PrevPrevLine = nullptr; const AnnotatedLine *PreviousLine = nullptr; const AnnotatedLine *NextLine = nullptr; @@ -1160,7 +1173,7 @@ unsigned UnwrappedLineFormatter::format( if (ShouldFormat && TheLine.Type != LT_Invalid) { if (!DryRun) { bool LastLine = Line->First->is(tok::eof); - formatFirstToken(TheLine, PreviousLine, Lines, Indent, + formatFirstToken(TheLine, PreviousLine, PrevPrevLine, Lines, Indent, LastLine ? LastStartColumn : NextStartColumn + Indent); } @@ -1206,7 +1219,7 @@ unsigned UnwrappedLineFormatter::format( TheLine.LeadingEmptyLinesAffected); // Format the first token. if (ReformatLeadingWhitespace) - formatFirstToken(TheLine, PreviousLine, Lines, + formatFirstToken(TheLine, PreviousLine, PrevPrevLine, Lines, TheLine.First->OriginalColumn, TheLine.First->OriginalColumn); else @@ -1222,6 +1235,7 @@ unsigned UnwrappedLineFormatter::format( } if (!DryRun) markFinalized(TheLine.First); + PrevPrevLine = PreviousLine; PreviousLine = &TheLine; } PenaltyCache[CacheKey] = Penalty; @@ -1230,6 +1244,7 @@ unsigned UnwrappedLineFormatter::format( void UnwrappedLineFormatter::formatFirstToken( const AnnotatedLine &Line, const AnnotatedLine *PreviousLine, + const AnnotatedLine *PrevPrevLine, const SmallVectorImpl &Lines, unsigned Indent, unsigned NewlineIndent) { FormatToken &RootToken = *Line.First; @@ -1261,6 +1276,8 @@ void UnwrappedLineFormatter::formatFirstToken( if (!Style.KeepEmptyLinesAtTheStartOfBlocks && PreviousLine && PreviousLine->Last->is(tok::l_brace) && !PreviousLine->startsWithNamespace() && + !(PrevPrevLine && PrevPrevLine->startsWithNamespace() && + PreviousLine->startsWith(tok::l_brace)) && !startsExternCBlock(*PreviousLine)) Newlines = 1; diff --git a/clang/lib/Format/UnwrappedLineFormatter.h b/clang/lib/Format/UnwrappedLineFormatter.h index a1ff16999589f..3e33de07fa129 100644 --- a/clang/lib/Format/UnwrappedLineFormatter.h +++ b/clang/lib/Format/UnwrappedLineFormatter.h @@ -47,6 +47,7 @@ class UnwrappedLineFormatter { /// of the \c UnwrappedLine if there was no structural parsing error. void formatFirstToken(const AnnotatedLine &Line, const AnnotatedLine *PreviousLine, + const AnnotatedLine *PrevPrevLine, const SmallVectorImpl &Lines, unsigned Indent, unsigned NewlineIndent); diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 0fb5428f89673..45ff319b5841d 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -2021,7 +2021,15 @@ void UnwrappedLineParser::parseIfThenElse() { parseBlock(/*MustBeDeclaration=*/false); addUnwrappedLine(); } else if (FormatTok->Tok.is(tok::kw_if)) { + FormatToken *Previous = AllTokens[Tokens->getPosition() - 1]; + bool PrecededByComment = Previous->is(tok::comment); + if (PrecededByComment) { + addUnwrappedLine(); + ++Line->Level; + } parseIfThenElse(); + if (PrecededByComment) + --Line->Level; } else { addUnwrappedLine(); ++Line->Level; diff --git a/clang/lib/Format/UsingDeclarationsSorter.cpp b/clang/lib/Format/UsingDeclarationsSorter.cpp index b6559db61d0cb..5608a5a759537 100644 --- a/clang/lib/Format/UsingDeclarationsSorter.cpp +++ b/clang/lib/Format/UsingDeclarationsSorter.cpp @@ -48,7 +48,7 @@ int compareLabels(StringRef A, StringRef B) { return -1; // Two names within a group compare case-insensitively. - return NamesA[I].compare_lower(NamesB[I]); + return NamesA[I].compare_insensitive(NamesB[I]); } // I is the last index of NamesB and NamesB[I] is a non-namespace name. @@ -57,7 +57,7 @@ int compareLabels(StringRef A, StringRef B) { return 1; // Two namespaces names within a group compare case-insensitively. - int C = NamesA[I].compare_lower(NamesB[I]); + int C = NamesA[I].compare_insensitive(NamesB[I]); if (C != 0) return C; } diff --git a/clang/lib/Format/WhitespaceManager.cpp b/clang/lib/Format/WhitespaceManager.cpp index b079eac9803c4..ca2222d1feffb 100644 --- a/clang/lib/Format/WhitespaceManager.cpp +++ b/clang/lib/Format/WhitespaceManager.cpp @@ -353,6 +353,10 @@ AlignTokenSequence(const FormatStyle &Style, unsigned Start, unsigned End, if (Changes[i].Tok->is(TT_ConditionalExpr)) return true; + // Period Initializer .XXX = 1. + if (Changes[i].Tok->is(TT_DesignatedInitializerPeriod)) + return true; + // Continued ternary operator if (Changes[i].Tok->Previous && Changes[i].Tok->Previous->is(TT_ConditionalExpr)) diff --git a/clang/lib/Frontend/CompilerInvocation.cpp b/clang/lib/Frontend/CompilerInvocation.cpp index 19f2f16bea5fa..84465a0c5ae24 100644 --- a/clang/lib/Frontend/CompilerInvocation.cpp +++ b/clang/lib/Frontend/CompilerInvocation.cpp @@ -1865,13 +1865,7 @@ bool CompilerInvocation::ParseCodeGenArgs(CodeGenOptions &Opts, ArgList &Args, << A->getSpelling() << T.str(); const Option &O = A->getOption(); - if (O.matches(OPT_mabi_EQ_vec_default)) - Diags.Report(diag::err_aix_default_altivec_abi) - << A->getSpelling() << T.str(); - else { - assert(O.matches(OPT_mabi_EQ_vec_extabi)); - Opts.EnableAIXExtendedAltivecABI = 1; - } + Opts.EnableAIXExtendedAltivecABI = O.matches(OPT_mabi_EQ_vec_extabi); } bool NeedLocTracking = false; @@ -3481,9 +3475,6 @@ void CompilerInvocation::GenerateLangArgs(const LangOptions &Opts, if (Opts.OpenMPCUDAMode) GenerateArg(Args, OPT_fopenmp_cuda_mode, SA); - if (Opts.OpenMPCUDATargetParallel) - GenerateArg(Args, OPT_fopenmp_cuda_parallel_target_regions, SA); - if (Opts.OpenMPCUDAForceFullRuntime) GenerateArg(Args, OPT_fopenmp_cuda_force_full_runtime, SA); @@ -3953,12 +3944,6 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, Opts.OpenMPCUDAMode = Opts.OpenMPIsDevice && (T.isNVPTX() || T.isAMDGCN()) && Args.hasArg(options::OPT_fopenmp_cuda_mode); - // Set CUDA support for parallel execution of target regions for OpenMP target - // NVPTX/AMDGCN if specified in options. - Opts.OpenMPCUDATargetParallel = - Opts.OpenMPIsDevice && (T.isNVPTX() || T.isAMDGCN()) && - Args.hasArg(options::OPT_fopenmp_cuda_parallel_target_regions); - // Set CUDA mode for OpenMP target NVPTX/AMDGCN if specified in options Opts.OpenMPCUDAForceFullRuntime = Opts.OpenMPIsDevice && (T.isNVPTX() || T.isAMDGCN()) && @@ -4044,13 +4029,13 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, if (Arg *A = Args.getLastArg(OPT_msign_return_address_EQ)) { StringRef SignScope = A->getValue(); - if (SignScope.equals_lower("none")) + if (SignScope.equals_insensitive("none")) Opts.setSignReturnAddressScope( LangOptions::SignReturnAddressScopeKind::None); - else if (SignScope.equals_lower("all")) + else if (SignScope.equals_insensitive("all")) Opts.setSignReturnAddressScope( LangOptions::SignReturnAddressScopeKind::All); - else if (SignScope.equals_lower("non-leaf")) + else if (SignScope.equals_insensitive("non-leaf")) Opts.setSignReturnAddressScope( LangOptions::SignReturnAddressScopeKind::NonLeaf); else @@ -4060,10 +4045,10 @@ bool CompilerInvocation::ParseLangArgs(LangOptions &Opts, ArgList &Args, if (Arg *A = Args.getLastArg(OPT_msign_return_address_key_EQ)) { StringRef SignKey = A->getValue(); if (!SignScope.empty() && !SignKey.empty()) { - if (SignKey.equals_lower("a_key")) + if (SignKey.equals_insensitive("a_key")) Opts.setSignReturnAddressKey( LangOptions::SignReturnAddressKeyKind::AKey); - else if (SignKey.equals_lower("b_key")) + else if (SignKey.equals_insensitive("b_key")) Opts.setSignReturnAddressKey( LangOptions::SignReturnAddressKeyKind::BKey); else diff --git a/clang/lib/Frontend/InitPreprocessor.cpp b/clang/lib/Frontend/InitPreprocessor.cpp index 8409736bd56ff..cd29711d312f7 100644 --- a/clang/lib/Frontend/InitPreprocessor.cpp +++ b/clang/lib/Frontend/InitPreprocessor.cpp @@ -611,8 +611,10 @@ static void InitializeCPlusPlusFeatureTestMacros(const LangOptions &LangOpts, Builder.defineMacro("__cpp_using_enum", "201907L"); } // C++2b features. - if (LangOpts.CPlusPlus2b) + if (LangOpts.CPlusPlus2b) { + Builder.defineMacro("__cpp_implicit_move", "202011L"); Builder.defineMacro("__cpp_size_t_suffix", "202011L"); + } if (LangOpts.Char8) Builder.defineMacro("__cpp_char8_t", "201811L"); Builder.defineMacro("__cpp_impl_destroying_delete", "201806L"); @@ -1184,6 +1186,9 @@ static void InitializePredefinedMacros(const TargetInfo &TI, if (DeviceTriple.isSPIR() && DeviceSubArch != llvm::Triple::SPIRSubArch_fpga) Builder.defineMacro("SYCL_USE_NATIVE_FP_ATOMICS"); + // Enable generation of USM address spaces for FPGA. + if (DeviceSubArch == llvm::Triple::SPIRSubArch_fpga) + Builder.defineMacro("__ENABLE_USM_ADDR_SPACE__"); } if (LangOpts.SYCLUnnamedLambda) Builder.defineMacro("__SYCL_UNNAMED_LAMBDA__"); diff --git a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp index ac64e1708da6c..b95851e380d28 100644 --- a/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp +++ b/clang/lib/FrontendTool/ExecuteCompilerInvocation.cpp @@ -187,7 +187,7 @@ CreateFrontendAction(CompilerInstance &CI) { bool ExecuteCompilerInvocation(CompilerInstance *Clang) { // Honor -help. if (Clang->getFrontendOpts().ShowHelp) { - driver::getDriverOptTable().PrintHelp( + driver::getDriverOptTable().printHelp( llvm::outs(), "clang -cc1 [options] file...", "LLVM 'Clang' Compiler: http://clang.llvm.org", /*Include=*/driver::options::CC1Option, diff --git a/clang/lib/Headers/__clang_hip_cmath.h b/clang/lib/Headers/__clang_hip_cmath.h index b5d7c16ac5e41..7342705434e6b 100644 --- a/clang/lib/Headers/__clang_hip_cmath.h +++ b/clang/lib/Headers/__clang_hip_cmath.h @@ -52,8 +52,46 @@ __DEVICE__ int fpclassify(double __x) { __DEVICE__ float frexp(float __arg, int *__exp) { return ::frexpf(__arg, __exp); } + +#if defined(__OPENMP_AMDGCN__) +// For OpenMP we work around some old system headers that have non-conforming +// `isinf(float)` and `isnan(float)` implementations that return an `int`. We do +// this by providing two versions of these functions, differing only in the +// return type. To avoid conflicting definitions we disable implicit base +// function generation. That means we will end up with two specializations, one +// per type, but only one has a base function defined by the system header. +#pragma omp begin declare variant match( \ + implementation = {extension(disable_implicit_base)}) + +// FIXME: We lack an extension to customize the mangling of the variants, e.g., +// add a suffix. This means we would clash with the names of the variants +// (note that we do not create implicit base functions here). To avoid +// this clash we add a new trait to some of them that is always true +// (this is LLVM after all ;)). It will only influence the mangled name +// of the variants inside the inner region and avoid the clash. +#pragma omp begin declare variant match(implementation = {vendor(llvm)}) + +__DEVICE__ int isinf(float __x) { return ::__isinff(__x); } +__DEVICE__ int isinf(double __x) { return ::__isinf(__x); } +__DEVICE__ int isfinite(float __x) { return ::__finitef(__x); } +__DEVICE__ int isfinite(double __x) { return ::__finite(__x); } +__DEVICE__ int isnan(float __x) { return ::__isnanf(__x); } +__DEVICE__ int isnan(double __x) { return ::__isnan(__x); } + +#pragma omp end declare variant +#endif // defined(__OPENMP_AMDGCN__) + +__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); } +__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); } __DEVICE__ bool isfinite(float __x) { return ::__finitef(__x); } __DEVICE__ bool isfinite(double __x) { return ::__finite(__x); } +__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); } +__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); } + +#if defined(__OPENMP_AMDGCN__) +#pragma omp end declare variant +#endif // defined(__OPENMP_AMDGCN__) + __DEVICE__ bool isgreater(float __x, float __y) { return __builtin_isgreater(__x, __y); } @@ -66,8 +104,6 @@ __DEVICE__ bool isgreaterequal(float __x, float __y) { __DEVICE__ bool isgreaterequal(double __x, double __y) { return __builtin_isgreaterequal(__x, __y); } -__DEVICE__ bool isinf(float __x) { return ::__isinff(__x); } -__DEVICE__ bool isinf(double __x) { return ::__isinf(__x); } __DEVICE__ bool isless(float __x, float __y) { return __builtin_isless(__x, __y); } @@ -86,8 +122,6 @@ __DEVICE__ bool islessgreater(float __x, float __y) { __DEVICE__ bool islessgreater(double __x, double __y) { return __builtin_islessgreater(__x, __y); } -__DEVICE__ bool isnan(float __x) { return ::__isnanf(__x); } -__DEVICE__ bool isnan(double __x) { return ::__isnan(__x); } __DEVICE__ bool isnormal(float __x) { return __builtin_isnormal(__x); } __DEVICE__ bool isnormal(double __x) { return __builtin_isnormal(__x); } __DEVICE__ bool isunordered(float __x, float __y) { diff --git a/clang/lib/Headers/__clang_hip_runtime_wrapper.h b/clang/lib/Headers/__clang_hip_runtime_wrapper.h index 58f148f9a2680..c557796c8fa0a 100644 --- a/clang/lib/Headers/__clang_hip_runtime_wrapper.h +++ b/clang/lib/Headers/__clang_hip_runtime_wrapper.h @@ -51,6 +51,23 @@ typedef __SIZE_TYPE__ size_t; #define nullptr NULL; #endif +#ifdef __cplusplus +extern "C" { + __attribute__((__visibility__("default"))) + __attribute__((weak)) + __attribute__((noreturn)) + __device__ void __cxa_pure_virtual(void) { + __builtin_trap(); + } + __attribute__((__visibility__("default"))) + __attribute__((weak)) + __attribute__((noreturn)) + __device__ void __cxa_deleted_virtual(void) { + __builtin_trap(); + } +} +#endif //__cplusplus + #if __HIP_ENABLE_DEVICE_MALLOC__ extern "C" __device__ void *__hip_malloc(size_t __size); extern "C" __device__ void *__hip_free(void *__ptr); diff --git a/clang/lib/Headers/altivec.h b/clang/lib/Headers/altivec.h index dadf6b5cf75bb..3517da798547a 100644 --- a/clang/lib/Headers/altivec.h +++ b/clang/lib/Headers/altivec.h @@ -312,16 +312,20 @@ vec_add_u128(vector unsigned char __a, vector unsigned char __b) { #elif defined(__VSX__) static __inline__ vector signed long long __ATTRS_o_ai vec_add(vector signed long long __a, vector signed long long __b) { +#ifdef __LITTLE_ENDIAN__ + // Little endian systems on CPU's prior to Power8 don't really exist + // so scalarizing is fine. + return __a + __b; +#else vector unsigned int __res = (vector unsigned int)__a + (vector unsigned int)__b; vector unsigned int __carry = __builtin_altivec_vaddcuw( (vector unsigned int)__a, (vector unsigned int)__b); -#ifdef __LITTLE_ENDIAN__ - __carry = __builtin_shufflevector(__carry, __carry, 3, 0, 1, 2); -#else - __carry = __builtin_shufflevector(__carry, __carry, 1, 2, 3, 0); -#endif + __carry = __builtin_shufflevector((vector unsigned char)__carry, + (vector unsigned char)__carry, 0, 0, 0, 7, + 0, 0, 0, 0, 0, 0, 0, 15, 0, 0, 0, 0); return (vector signed long long)(__res + __carry); +#endif } static __inline__ vector unsigned long long __ATTRS_o_ai diff --git a/clang/lib/Lex/HeaderMap.cpp b/clang/lib/Lex/HeaderMap.cpp index 4b60cfa7b52dd..ae5e6b221953f 100644 --- a/clang/lib/Lex/HeaderMap.cpp +++ b/clang/lib/Lex/HeaderMap.cpp @@ -224,7 +224,7 @@ StringRef HeaderMapImpl::lookupFilename(StringRef Filename, Optional Key = getString(B.Key); if (LLVM_UNLIKELY(!Key)) continue; - if (!Filename.equals_lower(*Key)) + if (!Filename.equals_insensitive(*Key)) continue; // If so, we have a match in the hash table. Construct the destination diff --git a/clang/lib/Lex/PPDirectives.cpp b/clang/lib/Lex/PPDirectives.cpp index b3ebc75be2fa9..80c85b791bd37 100644 --- a/clang/lib/Lex/PPDirectives.cpp +++ b/clang/lib/Lex/PPDirectives.cpp @@ -196,7 +196,7 @@ static MacroDiag shouldWarnOnMacroUndef(Preprocessor &PP, IdentifierInfo *II) { static bool warnByDefaultOnWrongCase(StringRef Include) { // If the first component of the path is "boost", treat this like a standard header // for the purposes of diagnostics. - if (::llvm::sys::path::begin(Include)->equals_lower("boost")) + if (::llvm::sys::path::begin(Include)->equals_insensitive("boost")) return true; // "condition_variable" is the longest standard header name at 18 characters. @@ -1723,7 +1723,8 @@ static bool trySimplifyPath(SmallVectorImpl &Components, // If these path components differ by more than just case, then we // may be looking at symlinked paths. Bail on this diagnostic to avoid // noisy false positives. - SuggestReplacement = RealPathComponentIter->equals_lower(Component); + SuggestReplacement = + RealPathComponentIter->equals_insensitive(Component); if (!SuggestReplacement) break; Component = *RealPathComponentIter; diff --git a/clang/lib/Lex/PPMacroExpansion.cpp b/clang/lib/Lex/PPMacroExpansion.cpp index e1a9cb78e8e57..6cf5d05c41ece 100644 --- a/clang/lib/Lex/PPMacroExpansion.cpp +++ b/clang/lib/Lex/PPMacroExpansion.cpp @@ -1430,7 +1430,7 @@ static bool isTargetVendor(const TargetInfo &TI, const IdentifierInfo *II) { StringRef VendorName = TI.getTriple().getVendorName(); if (VendorName.empty()) VendorName = "unknown"; - return VendorName.equals_lower(II->getName()); + return VendorName.equals_insensitive(II->getName()); } /// Implements the __is_target_os builtin macro. diff --git a/clang/lib/Lex/Pragma.cpp b/clang/lib/Lex/Pragma.cpp index 5b42241a32c2e..081b92ac21d9a 100644 --- a/clang/lib/Lex/Pragma.cpp +++ b/clang/lib/Lex/Pragma.cpp @@ -1955,6 +1955,7 @@ void Preprocessor::RegisterBuiltinPragmas() { AddPragmaHandler(new PragmaExecCharsetHandler()); AddPragmaHandler(new PragmaIncludeAliasHandler()); AddPragmaHandler(new PragmaHdrstopHandler()); + AddPragmaHandler(new PragmaSystemHeaderHandler()); } // Pragmas added by plugins diff --git a/clang/lib/Parse/ParseExprCXX.cpp b/clang/lib/Parse/ParseExprCXX.cpp index 93f578edc09e6..f3d10b4a08895 100644 --- a/clang/lib/Parse/ParseExprCXX.cpp +++ b/clang/lib/Parse/ParseExprCXX.cpp @@ -9,7 +9,6 @@ // This file implements the Expression parsing implementation for C++. // //===----------------------------------------------------------------------===// -#include "clang/Parse/Parser.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" #include "clang/AST/DeclTemplate.h" @@ -17,6 +16,7 @@ #include "clang/Basic/PrettyStackTrace.h" #include "clang/Lex/LiteralSupport.h" #include "clang/Parse/ParseDiagnostic.h" +#include "clang/Parse/Parser.h" #include "clang/Parse/RAIIObjectsForParser.h" #include "clang/Sema/DeclSpec.h" #include "clang/Sema/ParsedTemplate.h" @@ -2636,9 +2636,10 @@ bool Parser::ParseUnqualifiedIdOperator(CXXScopeSpec &SS, bool EnteringContext, // Grab the literal operator's suffix, which will be either the next token // or a ud-suffix from the string literal. + bool IsUDSuffix = !Literal.getUDSuffix().empty(); IdentifierInfo *II = nullptr; SourceLocation SuffixLoc; - if (!Literal.getUDSuffix().empty()) { + if (IsUDSuffix) { II = &PP.getIdentifierTable().get(Literal.getUDSuffix()); SuffixLoc = Lexer::AdvanceToTokenCharacter(TokLocs[Literal.getUDSuffixToken()], @@ -2675,7 +2676,7 @@ bool Parser::ParseUnqualifiedIdOperator(CXXScopeSpec &SS, bool EnteringContext, Result.setLiteralOperatorId(II, KeywordLoc, SuffixLoc); - return Actions.checkLiteralOperatorId(SS, Result); + return Actions.checkLiteralOperatorId(SS, Result, IsUDSuffix); } // Parse a conversion-function-id. diff --git a/clang/lib/Sema/CodeCompleteConsumer.cpp b/clang/lib/Sema/CodeCompleteConsumer.cpp index 678a09ba10034..3ab2a18f5e8d5 100644 --- a/clang/lib/Sema/CodeCompleteConsumer.cpp +++ b/clang/lib/Sema/CodeCompleteConsumer.cpp @@ -755,7 +755,7 @@ bool clang::operator<(const CodeCompletionResult &X, std::string XSaved, YSaved; StringRef XStr = X.getOrderedName(XSaved); StringRef YStr = Y.getOrderedName(YSaved); - int cmp = XStr.compare_lower(YStr); + int cmp = XStr.compare_insensitive(YStr); if (cmp) return cmp < 0; diff --git a/clang/lib/Sema/Sema.cpp b/clang/lib/Sema/Sema.cpp index a52f7b45623a8..72b3f687560d6 100644 --- a/clang/lib/Sema/Sema.cpp +++ b/clang/lib/Sema/Sema.cpp @@ -1856,7 +1856,7 @@ Sema::SemaDiagnosticBuilder Sema::Diag(SourceLocation Loc, unsigned DiagID, bool IsError = Diags.getDiagnosticIDs()->isDefaultMappingAsError(DiagID); bool ShouldDefer = getLangOpts().CUDA && LangOpts.GPUDeferDiag && DiagnosticIDs::isDeferrable(DiagID) && - (DeferHint || !IsError); + (DeferHint || DeferDiags || !IsError); auto SetIsLastErrorImmediate = [&](bool Flag) { if (IsError) IsLastErrorImmediate = Flag; diff --git a/clang/lib/Sema/SemaChecking.cpp b/clang/lib/Sema/SemaChecking.cpp index 908bd4a829c70..5fb96aa97f111 100644 --- a/clang/lib/Sema/SemaChecking.cpp +++ b/clang/lib/Sema/SemaChecking.cpp @@ -3280,34 +3280,33 @@ static QualType DecodePPCMMATypeFromStr(ASTContext &Context, const char *&Str, } } +static bool isPPC_64Builtin(unsigned BuiltinID) { + // These builtins only work on PPC 64bit targets. + switch (BuiltinID) { + case PPC::BI__builtin_divde: + case PPC::BI__builtin_divdeu: + case PPC::BI__builtin_bpermd: + return true; + } + return false; +} + +static bool SemaFeatureCheck(Sema &S, CallExpr *TheCall, + StringRef FeatureToCheck, unsigned DiagID) { + if (!S.Context.getTargetInfo().hasFeature(FeatureToCheck)) + return S.Diag(TheCall->getBeginLoc(), DiagID) << TheCall->getSourceRange(); + return false; +} + bool Sema::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, CallExpr *TheCall) { unsigned i = 0, l = 0, u = 0; - bool Is64BitBltin = BuiltinID == PPC::BI__builtin_divde || - BuiltinID == PPC::BI__builtin_divdeu || - BuiltinID == PPC::BI__builtin_bpermd; bool IsTarget64Bit = TI.getTypeWidth(TI.getIntPtrType()) == 64; - bool IsBltinExtDiv = BuiltinID == PPC::BI__builtin_divwe || - BuiltinID == PPC::BI__builtin_divweu || - BuiltinID == PPC::BI__builtin_divde || - BuiltinID == PPC::BI__builtin_divdeu; - if (Is64BitBltin && !IsTarget64Bit) + if (isPPC_64Builtin(BuiltinID) && !IsTarget64Bit) return Diag(TheCall->getBeginLoc(), diag::err_64_bit_builtin_32_bit_tgt) << TheCall->getSourceRange(); - if ((IsBltinExtDiv && !TI.hasFeature("extdiv")) || - (BuiltinID == PPC::BI__builtin_bpermd && !TI.hasFeature("bpermd"))) - return Diag(TheCall->getBeginLoc(), diag::err_ppc_builtin_only_on_pwr7) - << TheCall->getSourceRange(); - - auto SemaVSXCheck = [&](CallExpr *TheCall) -> bool { - if (!TI.hasFeature("vsx")) - return Diag(TheCall->getBeginLoc(), diag::err_ppc_builtin_only_on_pwr7) - << TheCall->getSourceRange(); - return false; - }; - switch (BuiltinID) { default: return false; case PPC::BI__builtin_altivec_crypto_vshasigmaw: @@ -3333,11 +3332,22 @@ bool Sema::CheckPPCBuiltinFunctionCall(const TargetInfo &TI, unsigned BuiltinID, case PPC::BI__builtin_vsx_xxpermdi: case PPC::BI__builtin_vsx_xxsldwi: return SemaBuiltinVSX(TheCall); + case PPC::BI__builtin_divwe: + case PPC::BI__builtin_divweu: + case PPC::BI__builtin_divde: + case PPC::BI__builtin_divdeu: + return SemaFeatureCheck(*this, TheCall, "extdiv", + diag::err_ppc_builtin_only_on_pwr7); + case PPC::BI__builtin_bpermd: + return SemaFeatureCheck(*this, TheCall, "bpermd", + diag::err_ppc_builtin_only_on_pwr7); case PPC::BI__builtin_unpack_vector_int128: - return SemaVSXCheck(TheCall) || + return SemaFeatureCheck(*this, TheCall, "vsx", + diag::err_ppc_builtin_only_on_pwr7) || SemaBuiltinConstantArgRange(TheCall, 1, 0, 1); case PPC::BI__builtin_pack_vector_int128: - return SemaVSXCheck(TheCall); + return SemaFeatureCheck(*this, TheCall, "vsx", + diag::err_ppc_builtin_only_on_pwr7); case PPC::BI__builtin_altivec_vgnb: return SemaBuiltinConstantArgRange(TheCall, 1, 2, 7); case PPC::BI__builtin_altivec_vec_replace_elt: @@ -3503,6 +3513,132 @@ bool Sema::CheckRISCVBuiltinFunctionCall(const TargetInfo &TI, case RISCV::BI__builtin_rvv_vsetvlimax: return SemaBuiltinConstantArgRange(TheCall, 0, 0, 3) || CheckRISCVLMUL(TheCall, 1); + case RISCV::BI__builtin_rvv_vget_v_i8m2_i8m1: + case RISCV::BI__builtin_rvv_vget_v_i16m2_i16m1: + case RISCV::BI__builtin_rvv_vget_v_i32m2_i32m1: + case RISCV::BI__builtin_rvv_vget_v_i64m2_i64m1: + case RISCV::BI__builtin_rvv_vget_v_f32m2_f32m1: + case RISCV::BI__builtin_rvv_vget_v_f64m2_f64m1: + case RISCV::BI__builtin_rvv_vget_v_u8m2_u8m1: + case RISCV::BI__builtin_rvv_vget_v_u16m2_u16m1: + case RISCV::BI__builtin_rvv_vget_v_u32m2_u32m1: + case RISCV::BI__builtin_rvv_vget_v_u64m2_u64m1: + case RISCV::BI__builtin_rvv_vget_v_i8m4_i8m2: + case RISCV::BI__builtin_rvv_vget_v_i16m4_i16m2: + case RISCV::BI__builtin_rvv_vget_v_i32m4_i32m2: + case RISCV::BI__builtin_rvv_vget_v_i64m4_i64m2: + case RISCV::BI__builtin_rvv_vget_v_f32m4_f32m2: + case RISCV::BI__builtin_rvv_vget_v_f64m4_f64m2: + case RISCV::BI__builtin_rvv_vget_v_u8m4_u8m2: + case RISCV::BI__builtin_rvv_vget_v_u16m4_u16m2: + case RISCV::BI__builtin_rvv_vget_v_u32m4_u32m2: + case RISCV::BI__builtin_rvv_vget_v_u64m4_u64m2: + case RISCV::BI__builtin_rvv_vget_v_i8m8_i8m4: + case RISCV::BI__builtin_rvv_vget_v_i16m8_i16m4: + case RISCV::BI__builtin_rvv_vget_v_i32m8_i32m4: + case RISCV::BI__builtin_rvv_vget_v_i64m8_i64m4: + case RISCV::BI__builtin_rvv_vget_v_f32m8_f32m4: + case RISCV::BI__builtin_rvv_vget_v_f64m8_f64m4: + case RISCV::BI__builtin_rvv_vget_v_u8m8_u8m4: + case RISCV::BI__builtin_rvv_vget_v_u16m8_u16m4: + case RISCV::BI__builtin_rvv_vget_v_u32m8_u32m4: + case RISCV::BI__builtin_rvv_vget_v_u64m8_u64m4: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1); + case RISCV::BI__builtin_rvv_vget_v_i8m4_i8m1: + case RISCV::BI__builtin_rvv_vget_v_i16m4_i16m1: + case RISCV::BI__builtin_rvv_vget_v_i32m4_i32m1: + case RISCV::BI__builtin_rvv_vget_v_i64m4_i64m1: + case RISCV::BI__builtin_rvv_vget_v_f32m4_f32m1: + case RISCV::BI__builtin_rvv_vget_v_f64m4_f64m1: + case RISCV::BI__builtin_rvv_vget_v_u8m4_u8m1: + case RISCV::BI__builtin_rvv_vget_v_u16m4_u16m1: + case RISCV::BI__builtin_rvv_vget_v_u32m4_u32m1: + case RISCV::BI__builtin_rvv_vget_v_u64m4_u64m1: + case RISCV::BI__builtin_rvv_vget_v_i8m8_i8m2: + case RISCV::BI__builtin_rvv_vget_v_i16m8_i16m2: + case RISCV::BI__builtin_rvv_vget_v_i32m8_i32m2: + case RISCV::BI__builtin_rvv_vget_v_i64m8_i64m2: + case RISCV::BI__builtin_rvv_vget_v_f32m8_f32m2: + case RISCV::BI__builtin_rvv_vget_v_f64m8_f64m2: + case RISCV::BI__builtin_rvv_vget_v_u8m8_u8m2: + case RISCV::BI__builtin_rvv_vget_v_u16m8_u16m2: + case RISCV::BI__builtin_rvv_vget_v_u32m8_u32m2: + case RISCV::BI__builtin_rvv_vget_v_u64m8_u64m2: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 3); + case RISCV::BI__builtin_rvv_vget_v_i8m8_i8m1: + case RISCV::BI__builtin_rvv_vget_v_i16m8_i16m1: + case RISCV::BI__builtin_rvv_vget_v_i32m8_i32m1: + case RISCV::BI__builtin_rvv_vget_v_i64m8_i64m1: + case RISCV::BI__builtin_rvv_vget_v_f32m8_f32m1: + case RISCV::BI__builtin_rvv_vget_v_f64m8_f64m1: + case RISCV::BI__builtin_rvv_vget_v_u8m8_u8m1: + case RISCV::BI__builtin_rvv_vget_v_u16m8_u16m1: + case RISCV::BI__builtin_rvv_vget_v_u32m8_u32m1: + case RISCV::BI__builtin_rvv_vget_v_u64m8_u64m1: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); + case RISCV::BI__builtin_rvv_vset_v_i8m1_i8m2: + case RISCV::BI__builtin_rvv_vset_v_i16m1_i16m2: + case RISCV::BI__builtin_rvv_vset_v_i32m1_i32m2: + case RISCV::BI__builtin_rvv_vset_v_i64m1_i64m2: + case RISCV::BI__builtin_rvv_vset_v_f32m1_f32m2: + case RISCV::BI__builtin_rvv_vset_v_f64m1_f64m2: + case RISCV::BI__builtin_rvv_vset_v_u8m1_u8m2: + case RISCV::BI__builtin_rvv_vset_v_u16m1_u16m2: + case RISCV::BI__builtin_rvv_vset_v_u32m1_u32m2: + case RISCV::BI__builtin_rvv_vset_v_u64m1_u64m2: + case RISCV::BI__builtin_rvv_vset_v_i8m2_i8m4: + case RISCV::BI__builtin_rvv_vset_v_i16m2_i16m4: + case RISCV::BI__builtin_rvv_vset_v_i32m2_i32m4: + case RISCV::BI__builtin_rvv_vset_v_i64m2_i64m4: + case RISCV::BI__builtin_rvv_vset_v_f32m2_f32m4: + case RISCV::BI__builtin_rvv_vset_v_f64m2_f64m4: + case RISCV::BI__builtin_rvv_vset_v_u8m2_u8m4: + case RISCV::BI__builtin_rvv_vset_v_u16m2_u16m4: + case RISCV::BI__builtin_rvv_vset_v_u32m2_u32m4: + case RISCV::BI__builtin_rvv_vset_v_u64m2_u64m4: + case RISCV::BI__builtin_rvv_vset_v_i8m4_i8m8: + case RISCV::BI__builtin_rvv_vset_v_i16m4_i16m8: + case RISCV::BI__builtin_rvv_vset_v_i32m4_i32m8: + case RISCV::BI__builtin_rvv_vset_v_i64m4_i64m8: + case RISCV::BI__builtin_rvv_vset_v_f32m4_f32m8: + case RISCV::BI__builtin_rvv_vset_v_f64m4_f64m8: + case RISCV::BI__builtin_rvv_vset_v_u8m4_u8m8: + case RISCV::BI__builtin_rvv_vset_v_u16m4_u16m8: + case RISCV::BI__builtin_rvv_vset_v_u32m4_u32m8: + case RISCV::BI__builtin_rvv_vset_v_u64m4_u64m8: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 1); + case RISCV::BI__builtin_rvv_vset_v_i8m1_i8m4: + case RISCV::BI__builtin_rvv_vset_v_i16m1_i16m4: + case RISCV::BI__builtin_rvv_vset_v_i32m1_i32m4: + case RISCV::BI__builtin_rvv_vset_v_i64m1_i64m4: + case RISCV::BI__builtin_rvv_vset_v_f32m1_f32m4: + case RISCV::BI__builtin_rvv_vset_v_f64m1_f64m4: + case RISCV::BI__builtin_rvv_vset_v_u8m1_u8m4: + case RISCV::BI__builtin_rvv_vset_v_u16m1_u16m4: + case RISCV::BI__builtin_rvv_vset_v_u32m1_u32m4: + case RISCV::BI__builtin_rvv_vset_v_u64m1_u64m4: + case RISCV::BI__builtin_rvv_vset_v_i8m2_i8m8: + case RISCV::BI__builtin_rvv_vset_v_i16m2_i16m8: + case RISCV::BI__builtin_rvv_vset_v_i32m2_i32m8: + case RISCV::BI__builtin_rvv_vset_v_i64m2_i64m8: + case RISCV::BI__builtin_rvv_vset_v_f32m2_f32m8: + case RISCV::BI__builtin_rvv_vset_v_f64m2_f64m8: + case RISCV::BI__builtin_rvv_vset_v_u8m2_u8m8: + case RISCV::BI__builtin_rvv_vset_v_u16m2_u16m8: + case RISCV::BI__builtin_rvv_vset_v_u32m2_u32m8: + case RISCV::BI__builtin_rvv_vset_v_u64m2_u64m8: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 3); + case RISCV::BI__builtin_rvv_vset_v_i8m1_i8m8: + case RISCV::BI__builtin_rvv_vset_v_i16m1_i16m8: + case RISCV::BI__builtin_rvv_vset_v_i32m1_i32m8: + case RISCV::BI__builtin_rvv_vset_v_i64m1_i64m8: + case RISCV::BI__builtin_rvv_vset_v_f32m1_f32m8: + case RISCV::BI__builtin_rvv_vset_v_f64m1_f64m8: + case RISCV::BI__builtin_rvv_vset_v_u8m1_u8m8: + case RISCV::BI__builtin_rvv_vset_v_u16m1_u16m8: + case RISCV::BI__builtin_rvv_vset_v_u32m1_u32m8: + case RISCV::BI__builtin_rvv_vset_v_u64m1_u64m8: + return SemaBuiltinConstantArgRange(TheCall, 1, 0, 7); } return false; @@ -7100,18 +7236,18 @@ bool Sema::SemaBuiltinARMSpecialReg(unsigned BuiltinID, CallExpr *TheCall, bool ValidString = true; if (IsARMBuiltin) { - ValidString &= Fields[0].startswith_lower("cp") || - Fields[0].startswith_lower("p"); + ValidString &= Fields[0].startswith_insensitive("cp") || + Fields[0].startswith_insensitive("p"); if (ValidString) - Fields[0] = - Fields[0].drop_front(Fields[0].startswith_lower("cp") ? 2 : 1); + Fields[0] = Fields[0].drop_front( + Fields[0].startswith_insensitive("cp") ? 2 : 1); - ValidString &= Fields[2].startswith_lower("c"); + ValidString &= Fields[2].startswith_insensitive("c"); if (ValidString) Fields[2] = Fields[2].drop_front(1); if (FiveFields) { - ValidString &= Fields[3].startswith_lower("c"); + ValidString &= Fields[3].startswith_insensitive("c"); if (ValidString) Fields[3] = Fields[3].drop_front(1); } diff --git a/clang/lib/Sema/SemaCodeComplete.cpp b/clang/lib/Sema/SemaCodeComplete.cpp index 1ab9c50709a9d..e03b671ae61e7 100644 --- a/clang/lib/Sema/SemaCodeComplete.cpp +++ b/clang/lib/Sema/SemaCodeComplete.cpp @@ -9491,10 +9491,10 @@ void Sema::CodeCompleteIncludedFile(llvm::StringRef Dir, bool Angled) { // Only files that really look like headers. (Except in system dirs). if (!IsSystem) { // Header extensions from Types.def, which we can't depend on here. - if (!(Filename.endswith_lower(".h") || - Filename.endswith_lower(".hh") || - Filename.endswith_lower(".hpp") || - Filename.endswith_lower(".inc"))) + if (!(Filename.endswith_insensitive(".h") || + Filename.endswith_insensitive(".hh") || + Filename.endswith_insensitive(".hpp") || + Filename.endswith_insensitive(".inc"))) break; } AddCompletion(Filename, /*IsDirectory=*/false); diff --git a/clang/lib/Sema/SemaDeclAttr.cpp b/clang/lib/Sema/SemaDeclAttr.cpp index 238464073ccca..3b0b7d9500edb 100644 --- a/clang/lib/Sema/SemaDeclAttr.cpp +++ b/clang/lib/Sema/SemaDeclAttr.cpp @@ -6798,7 +6798,10 @@ static bool ArmCdeAliasValid(unsigned BuiltinID, StringRef AliasName) { return ArmBuiltinAliasValid(BuiltinID, AliasName, Map, IntrinNames); } -static bool ArmSveAliasValid(unsigned BuiltinID, StringRef AliasName) { +static bool ArmSveAliasValid(ASTContext &Context, unsigned BuiltinID, + StringRef AliasName) { + if (Context.BuiltinInfo.isAuxBuiltinID(BuiltinID)) + BuiltinID = Context.BuiltinInfo.getAuxBuiltinID(BuiltinID); return BuiltinID >= AArch64::FirstSVEBuiltin && BuiltinID <= AArch64::LastSVEBuiltin; } @@ -6815,7 +6818,7 @@ static void handleArmBuiltinAliasAttr(Sema &S, Decl *D, const ParsedAttr &AL) { StringRef AliasName = cast(D)->getIdentifier()->getName(); bool IsAArch64 = S.Context.getTargetInfo().getTriple().isAArch64(); - if ((IsAArch64 && !ArmSveAliasValid(BuiltinID, AliasName)) || + if ((IsAArch64 && !ArmSveAliasValid(S.Context, BuiltinID, AliasName)) || (!IsAArch64 && !ArmMveAliasValid(BuiltinID, AliasName) && !ArmCdeAliasValid(BuiltinID, AliasName))) { S.Diag(AL.getLoc(), diag::err_attribute_arm_builtin_alias); @@ -6845,7 +6848,7 @@ static void handleBuiltinAliasAttr(Sema &S, Decl *D, bool IsAArch64 = S.Context.getTargetInfo().getTriple().isAArch64(); bool IsARM = S.Context.getTargetInfo().getTriple().isARM(); bool IsRISCV = S.Context.getTargetInfo().getTriple().isRISCV(); - if ((IsAArch64 && !ArmSveAliasValid(BuiltinID, AliasName)) || + if ((IsAArch64 && !ArmSveAliasValid(S.Context, BuiltinID, AliasName)) || (IsARM && !ArmMveAliasValid(BuiltinID, AliasName) && !ArmCdeAliasValid(BuiltinID, AliasName)) || (IsRISCV && !RISCVAliasValid(BuiltinID, AliasName)) || diff --git a/clang/lib/Sema/SemaDeclCXX.cpp b/clang/lib/Sema/SemaDeclCXX.cpp index 5109f1e877a26..83c97626ff7e7 100644 --- a/clang/lib/Sema/SemaDeclCXX.cpp +++ b/clang/lib/Sema/SemaDeclCXX.cpp @@ -7292,6 +7292,9 @@ void Sema::CheckExplicitlyDefaultedFunction(Scope *S, FunctionDecl *FD) { return; } + if (DefKind.isComparison()) + UnusedPrivateFields.clear(); + if (DefKind.isSpecialMember() ? CheckExplicitlyDefaultedSpecialMember(cast(FD), DefKind.asSpecialMember()) @@ -7866,15 +7869,6 @@ class DefaultedComparisonAnalyzer assert(Best->BuiltinParamTypes[2].isNull() && "invalid builtin comparison"); - // The builtin operator for relational comparisons on function - // pointers is the only known case which cannot be used. - if (OO != OO_EqualEqual && T->isFunctionPointerType()) { - if (Diagnose == ExplainDeleted) - S.Diag(Subobj.Loc, diag::note_defaulted_comparison_selected_invalid) - << Subobj.Kind << Subobj.Decl << T; - return Result::deleted(); - } - if (NeedsDeducing) { Optional Cat = getComparisonCategoryForBuiltinCmp(T); @@ -13199,6 +13193,16 @@ void Sema::setupImplicitSpecialMemberType(CXXMethodDecl *SpecialMem, auto QT = Context.getFunctionType(ResultTy, Args, EPI); SpecialMem->setType(QT); + + // During template instantiation of implicit special member functions we need + // a reliable TypeSourceInfo for the function prototype in order to allow + // functions to be substituted. + if (inTemplateInstantiation() && + cast(SpecialMem->getParent())->isLambda()) { + TypeSourceInfo *TSI = + Context.getTrivialTypeSourceInfo(SpecialMem->getType()); + SpecialMem->setTypeSourceInfo(TSI); + } } CXXConstructorDecl *Sema::DeclareImplicitDefaultConstructor( @@ -14877,12 +14881,18 @@ CXXConstructorDecl *Sema::DeclareImplicitCopyConstructor( setupImplicitSpecialMemberType(CopyConstructor, Context.VoidTy, ArgType); + // During template instantiation of special member functions we need a + // reliable TypeSourceInfo for the parameter types in order to allow functions + // to be substituted. + TypeSourceInfo *TSI = nullptr; + if (inTemplateInstantiation() && ClassDecl->isLambda()) + TSI = Context.getTrivialTypeSourceInfo(ArgType); + // Add the parameter to the constructor. - ParmVarDecl *FromParam = ParmVarDecl::Create(Context, CopyConstructor, - ClassLoc, ClassLoc, - /*IdentifierInfo=*/nullptr, - ArgType, /*TInfo=*/nullptr, - SC_None, nullptr); + ParmVarDecl *FromParam = + ParmVarDecl::Create(Context, CopyConstructor, ClassLoc, ClassLoc, + /*IdentifierInfo=*/nullptr, ArgType, + /*TInfo=*/TSI, SC_None, nullptr); CopyConstructor->setParams(FromParam); CopyConstructor->setTrivial( diff --git a/clang/lib/Sema/SemaExpr.cpp b/clang/lib/Sema/SemaExpr.cpp index 6ab200953744f..b4eef01e14cef 100644 --- a/clang/lib/Sema/SemaExpr.cpp +++ b/clang/lib/Sema/SemaExpr.cpp @@ -11890,6 +11890,21 @@ QualType Sema::CheckCompareOperands(ExprResult &LHS, ExprResult &RHS, LHS.get()->getSourceRange()); } + if (IsOrdered && LHSType->isFunctionPointerType() && + RHSType->isFunctionPointerType()) { + // Valid unless a relational comparison of function pointers + bool IsError = Opc == BO_Cmp; + auto DiagID = + IsError ? diag::err_typecheck_ordered_comparison_of_function_pointers + : getLangOpts().CPlusPlus + ? diag::warn_typecheck_ordered_comparison_of_function_pointers + : diag::ext_typecheck_ordered_comparison_of_function_pointers; + Diag(Loc, DiagID) << LHSType << RHSType << LHS.get()->getSourceRange() + << RHS.get()->getSourceRange(); + if (IsError) + return QualType(); + } + if ((LHSType->isIntegerType() && !LHSIsNull) || (RHSType->isIntegerType() && !RHSIsNull)) { // Skip normal pointer conversion checks in this case; we have better @@ -11957,12 +11972,6 @@ QualType Sema::CheckCompareOperands(ExprResult &LHS, ExprResult &RHS, << LHSType << RHSType << LCanPointeeTy->isIncompleteType() << RCanPointeeTy->isIncompleteType(); } - if (LCanPointeeTy->isFunctionType()) { - // Valid unless a relational comparison of function pointers - Diag(Loc, diag::ext_typecheck_ordered_comparison_of_function_pointers) - << LHSType << RHSType << LHS.get()->getSourceRange() - << RHS.get()->getSourceRange(); - } } } else if (!IsRelational && (LCanPointeeTy->isVoidType() || RCanPointeeTy->isVoidType())) { diff --git a/clang/lib/Sema/SemaExprCXX.cpp b/clang/lib/Sema/SemaExprCXX.cpp index be8eab660aa05..c3ae4494afd97 100644 --- a/clang/lib/Sema/SemaExprCXX.cpp +++ b/clang/lib/Sema/SemaExprCXX.cpp @@ -484,8 +484,25 @@ ParsedType Sema::getDestructorTypeForDecltype(const DeclSpec &DS, } bool Sema::checkLiteralOperatorId(const CXXScopeSpec &SS, - const UnqualifiedId &Name) { + const UnqualifiedId &Name, bool IsUDSuffix) { assert(Name.getKind() == UnqualifiedIdKind::IK_LiteralOperatorId); + if (!IsUDSuffix) { + // [over.literal] p8 + // + // double operator""_Bq(long double); // OK: not a reserved identifier + // double operator"" _Bq(long double); // ill-formed, no diagnostic required + IdentifierInfo *II = Name.Identifier; + ReservedIdentifierStatus Status = II->isReserved(PP.getLangOpts()); + SourceLocation Loc = Name.getEndLoc(); + if (Status != ReservedIdentifierStatus::NotReserved && + !PP.getSourceManager().isInSystemHeader(Loc)) { + Diag(Loc, diag::warn_reserved_extern_symbol) + << II << static_cast(Status) + << FixItHint::CreateReplacement( + Name.getSourceRange(), + (StringRef("operator\"\"") + II->getName()).str()); + } + } if (!SS.isValid()) return false; diff --git a/clang/lib/Sema/SemaOverload.cpp b/clang/lib/Sema/SemaOverload.cpp index 2801c3cff8afc..8092f0be6ccec 100644 --- a/clang/lib/Sema/SemaOverload.cpp +++ b/clang/lib/Sema/SemaOverload.cpp @@ -8466,7 +8466,7 @@ class BuiltinOperatorOverloadBuilder { // bool operator==(T, T); // bool operator!=(T, T); // R operator<=>(T, T) - void addGenericBinaryPointerOrEnumeralOverloads() { + void addGenericBinaryPointerOrEnumeralOverloads(bool IsSpaceship) { // C++ [over.match.oper]p3: // [...]the built-in candidates include all of the candidate operator // functions defined in 13.6 that, compared to the given operator, [...] @@ -8525,6 +8525,8 @@ class BuiltinOperatorOverloadBuilder { // Don't add the same builtin candidate twice. if (!AddedTypes.insert(S.Context.getCanonicalType(PtrTy)).second) continue; + if (IsSpaceship && PtrTy->isFunctionPointerType()) + continue; QualType ParamTypes[2] = {PtrTy, PtrTy}; S.AddBuiltinCandidate(ParamTypes, Args, CandidateSet); @@ -8715,7 +8717,7 @@ class BuiltinOperatorOverloadBuilder { // // where LR is the result of the usual arithmetic conversions // between types L and R. - void addBinaryBitwiseArithmeticOverloads(OverloadedOperatorKind Op) { + void addBinaryBitwiseArithmeticOverloads() { if (!HasArithmeticOrEnumeralCandidateType) return; @@ -9221,18 +9223,20 @@ void Sema::AddBuiltinOperatorCandidates(OverloadedOperatorKind Op, case OO_EqualEqual: case OO_ExclaimEqual: OpBuilder.addEqualEqualOrNotEqualMemberPointerOrNullptrOverloads(); - LLVM_FALLTHROUGH; + OpBuilder.addGenericBinaryPointerOrEnumeralOverloads(/*IsSpaceship=*/false); + OpBuilder.addGenericBinaryArithmeticOverloads(); + break; case OO_Less: case OO_Greater: case OO_LessEqual: case OO_GreaterEqual: - OpBuilder.addGenericBinaryPointerOrEnumeralOverloads(); + OpBuilder.addGenericBinaryPointerOrEnumeralOverloads(/*IsSpaceship=*/false); OpBuilder.addGenericBinaryArithmeticOverloads(); break; case OO_Spaceship: - OpBuilder.addGenericBinaryPointerOrEnumeralOverloads(); + OpBuilder.addGenericBinaryPointerOrEnumeralOverloads(/*IsSpaceship=*/true); OpBuilder.addThreeWayArithmeticOverloads(); break; @@ -9241,7 +9245,7 @@ void Sema::AddBuiltinOperatorCandidates(OverloadedOperatorKind Op, case OO_Pipe: case OO_LessLess: case OO_GreaterGreater: - OpBuilder.addBinaryBitwiseArithmeticOverloads(Op); + OpBuilder.addBinaryBitwiseArithmeticOverloads(); break; case OO_Amp: // '&' is either unary or binary @@ -9251,7 +9255,7 @@ void Sema::AddBuiltinOperatorCandidates(OverloadedOperatorKind Op, // operator '->', the built-in candidates set is empty. break; - OpBuilder.addBinaryBitwiseArithmeticOverloads(Op); + OpBuilder.addBinaryBitwiseArithmeticOverloads(); break; case OO_Tilde: @@ -11641,7 +11645,8 @@ bool OverloadCandidateSet::shouldDeferDiags(Sema &S, ArrayRef Args, CompleteCandidates(S, OCD_AllCandidates, Args, OpLoc, [](auto &Cand) { return (Cand.Viable == false && Cand.FailureKind == ovl_fail_bad_target) || - (Cand.Function->template hasAttr() && + (Cand.Function && + Cand.Function->template hasAttr() && Cand.Function->template hasAttr()); }); DeferHint = !WrongSidedCands.empty(); @@ -13820,6 +13825,8 @@ ExprResult Sema::CreateOverloadedBinOp(SourceLocation OpLoc, StringRef OpcStr = BinaryOperator::getOpcodeStr(Opc); auto Cands = CandidateSet.CompleteCandidates(*this, OCD_AllCandidates, Args, OpLoc); + DeferDiagsRAII DDR(*this, + CandidateSet.shouldDeferDiags(*this, Args, OpLoc)); if (Args[0]->getType()->isRecordType() && Opc >= BO_Assign && Opc <= BO_OrAssign) { Diag(OpLoc, diag::err_ovl_no_viable_oper) diff --git a/clang/lib/Sema/SemaSYCL.cpp b/clang/lib/Sema/SemaSYCL.cpp index a51ae72667e4c..1170ca0783169 100644 --- a/clang/lib/Sema/SemaSYCL.cpp +++ b/clang/lib/Sema/SemaSYCL.cpp @@ -1571,26 +1571,6 @@ class SyclKernelFieldChecker : public SyclKernelFieldHandler { << FieldTy; } - if (SemaRef.getASTContext().getLangOpts().SYCLStdLayoutKernelParams) - if (!FieldTy->isStandardLayoutType()) - return Diag.Report(FD->getLocation(), - diag::err_sycl_non_std_layout_type) - << FieldTy; - - if (!FieldTy->isStructureOrClassType()) - return false; - - CXXRecordDecl *RD = - cast(FieldTy->getAs()->getDecl()); - if (!RD->hasTrivialCopyConstructor()) - return Diag.Report(FD->getLocation(), - diag::err_sycl_non_trivially_copy_ctor_dtor_type) - << 0 << FieldTy; - if (!RD->hasTrivialDestructor()) - return Diag.Report(FD->getLocation(), - diag::err_sycl_non_trivially_copy_ctor_dtor_type) - << 1 << FieldTy; - return false; } @@ -2425,7 +2405,7 @@ class SyclOptReportCreator : public SyclKernelFieldHandler { unsigned KernelArgSize = SemaRef.getASTContext().getTypeSizeInChars(KernelArgType).getQuantity(); - SemaRef.getDiagnostics().getSYCLOptReportHandler().AddKernelArgs( + SemaRef.getDiagnostics().getSYCLOptReport().AddKernelArgs( DC.getKernelDecl(), NameToEmitInDescription, isWrappedField ? "Compiler generated" : KernelArgType.getAsString(), KernelInvocationLoc, KernelArgSize, getKernelArgDesc(KernelArgDesc), @@ -2448,7 +2428,7 @@ class SyclOptReportCreator : public SyclKernelFieldHandler { KernelArgDescription KernelArgDesc) { unsigned KernelArgSize = SemaRef.getASTContext().getTypeSizeInChars(KernelArgType).getQuantity(); - SemaRef.getDiagnostics().getSYCLOptReportHandler().AddKernelArgs( + SemaRef.getDiagnostics().getSYCLOptReport().AddKernelArgs( DC.getKernelDecl(), KernelArgType.getAsString(), KernelArgType.getAsString(), KernelInvocationLoc, KernelArgSize, getKernelArgDesc(KernelArgDesc), ""); @@ -2458,7 +2438,7 @@ class SyclOptReportCreator : public SyclKernelFieldHandler { void addParam(QualType KernelArgType, KernelArgDescription KernelArgDesc) { unsigned KernelArgSize = SemaRef.getASTContext().getTypeSizeInChars(KernelArgType).getQuantity(); - SemaRef.getDiagnostics().getSYCLOptReportHandler().AddKernelArgs( + SemaRef.getDiagnostics().getSYCLOptReport().AddKernelArgs( DC.getKernelDecl(), "", KernelArgType.getAsString(), KernelInvocationLoc, KernelArgSize, getKernelArgDesc(KernelArgDesc), ""); @@ -2490,7 +2470,7 @@ class SyclOptReportCreator : public SyclKernelFieldHandler { unsigned KernelArgSize = SemaRef.getASTContext() .getTypeSizeInChars(KernelArgType) .getQuantity(); - SemaRef.getDiagnostics().getSYCLOptReportHandler().AddKernelArgs( + SemaRef.getDiagnostics().getSYCLOptReport().AddKernelArgs( DC.getKernelDecl(), FieldTy.getAsString(), KernelArgType.getAsString(), KernelInvocationLoc, KernelArgSize, getKernelArgDesc( diff --git a/clang/lib/Sema/SemaTemplateInstantiate.cpp b/clang/lib/Sema/SemaTemplateInstantiate.cpp index a3cd7303e6765..d8d6203fe5861 100644 --- a/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -2911,7 +2911,8 @@ Sema::InstantiateClass(SourceLocation PointOfInstantiation, if (!Instantiation->isInvalidDecl()) { // Perform any dependent diagnostics from the pattern. - PerformDependentDiagnostics(Pattern, TemplateArgs); + if (Pattern->isDependentContext()) + PerformDependentDiagnostics(Pattern, TemplateArgs); // Instantiate any out-of-line class template partial // specializations now. diff --git a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp index 629d7ccce54b7..a37bd596aa5e9 100644 --- a/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp +++ b/clang/lib/Sema/SemaTemplateInstantiateDecl.cpp @@ -9,6 +9,7 @@ // //===----------------------------------------------------------------------===/ +#include "TreeTransform.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" #include "clang/AST/ASTMutationListener.h" @@ -2129,9 +2130,16 @@ Decl *TemplateDeclInstantiator::VisitCXXRecordDecl(CXXRecordDecl *D) { PrevDecl = cast(Prev); } - CXXRecordDecl *Record = CXXRecordDecl::Create( - SemaRef.Context, D->getTagKind(), Owner, D->getBeginLoc(), - D->getLocation(), D->getIdentifier(), PrevDecl); + CXXRecordDecl *Record = nullptr; + if (D->isLambda()) + Record = CXXRecordDecl::CreateLambda( + SemaRef.Context, Owner, D->getLambdaTypeInfo(), D->getLocation(), + D->isDependentLambda(), D->isGenericLambda(), + D->getLambdaCaptureDefault()); + else + Record = CXXRecordDecl::Create(SemaRef.Context, D->getTagKind(), Owner, + D->getBeginLoc(), D->getLocation(), + D->getIdentifier(), PrevDecl); // Substitute the nested name specifier, if any. if (SubstQualifier(D, Record)) @@ -2610,6 +2618,20 @@ Decl *TemplateDeclInstantiator::VisitCXXMethodDecl( if (InstantiatedExplicitSpecifier.isInvalid()) return nullptr; + // Implicit destructors/constructors created for local classes in + // DeclareImplicit* (see SemaDeclCXX.cpp) might not have an associated TSI. + // Unfortunately there isn't enough context in those functions to + // conditionally populate the TSI without breaking non-template related use + // cases. Populate TSIs prior to calling SubstFunctionType to make sure we get + // a proper transformation. + if (cast(D->getParent())->isLambda() && + !D->getTypeSourceInfo() && + isa(D)) { + TypeSourceInfo *TSI = + SemaRef.Context.getTrivialTypeSourceInfo(D->getType()); + D->setTypeSourceInfo(TSI); + } + SmallVector Params; TypeSourceInfo *TInfo = SubstFunctionType(D, Params); if (!TInfo) @@ -2699,6 +2721,9 @@ Decl *TemplateDeclInstantiator::VisitCXXMethodDecl( Destructor->isInlineSpecified(), false, Destructor->getConstexprKind(), TrailingRequiresClause); Method->setRangeEnd(Destructor->getEndLoc()); + Method->setDeclName(SemaRef.Context.DeclarationNames.getCXXDestructorName( + SemaRef.Context.getCanonicalType( + SemaRef.Context.getTypeDeclType(Record)))); } else if (CXXConversionDecl *Conversion = dyn_cast(D)) { Method = CXXConversionDecl::Create( SemaRef.Context, Record, StartLoc, NameInfo, T, TInfo, @@ -5223,10 +5248,76 @@ void Sema::InstantiateFunctionDefinition(SourceLocation PointOfInstantiation, Rec->isLocalClass() && !Function->isFunctionTemplateSpecialization(); LocalInstantiationScope Scope(*this, MergeWithParentScope); + auto RebuildTypeSourceInfoForDefaultSpecialMembers = [&]() { + // Special members might get their TypeSourceInfo set up w.r.t the + // PatternDecl context, in which case parameters could still be pointing + // back to the original class, make sure arguments are bound to the + // instantiated record instead. + assert(PatternDecl->isDefaulted() && + "Special member needs to be defaulted"); + auto PatternSM = getDefaultedFunctionKind(PatternDecl).asSpecialMember(); + if (!(PatternSM == Sema::CXXCopyConstructor || + PatternSM == Sema::CXXCopyAssignment || + PatternSM == Sema::CXXMoveConstructor || + PatternSM == Sema::CXXMoveAssignment)) + return; - if (PatternDecl->isDefaulted()) + auto *NewRec = dyn_cast(Function->getDeclContext()); + const auto *PatternRec = + dyn_cast(PatternDecl->getDeclContext()); + if (!NewRec || !PatternRec) + return; + if (!PatternRec->isLambda()) + return; + + struct SpecialMemberTypeInfoRebuilder + : TreeTransform { + using Base = TreeTransform; + const CXXRecordDecl *OldDecl; + CXXRecordDecl *NewDecl; + + SpecialMemberTypeInfoRebuilder(Sema &SemaRef, const CXXRecordDecl *O, + CXXRecordDecl *N) + : TreeTransform(SemaRef), OldDecl(O), NewDecl(N) {} + + bool TransformExceptionSpec(SourceLocation Loc, + FunctionProtoType::ExceptionSpecInfo &ESI, + SmallVectorImpl &Exceptions, + bool &Changed) { + return false; + } + + QualType TransformRecordType(TypeLocBuilder &TLB, RecordTypeLoc TL) { + const RecordType *T = TL.getTypePtr(); + RecordDecl *Record = cast_or_null( + getDerived().TransformDecl(TL.getNameLoc(), T->getDecl())); + if (Record != OldDecl) + return Base::TransformRecordType(TLB, TL); + + QualType Result = getDerived().RebuildRecordType(NewDecl); + if (Result.isNull()) + return QualType(); + + RecordTypeLoc NewTL = TLB.push(Result); + NewTL.setNameLoc(TL.getNameLoc()); + return Result; + } + } IR{*this, PatternRec, NewRec}; + + TypeSourceInfo *NewSI = IR.TransformType(Function->getTypeSourceInfo()); + Function->setType(NewSI->getType()); + Function->setTypeSourceInfo(NewSI); + + ParmVarDecl *Parm = Function->getParamDecl(0); + TypeSourceInfo *NewParmSI = IR.TransformType(Parm->getTypeSourceInfo()); + Parm->setType(NewParmSI->getType()); + Parm->setTypeSourceInfo(NewParmSI); + }; + + if (PatternDecl->isDefaulted()) { + RebuildTypeSourceInfoForDefaultSpecialMembers(); SetDeclDefaulted(Function, PatternDecl->getLocation()); - else { + } else { MultiLevelTemplateArgumentList TemplateArgs = getTemplateInstantiationArgs(Function, nullptr, false, PatternDecl); diff --git a/clang/lib/Sema/SemaType.cpp b/clang/lib/Sema/SemaType.cpp index 8cc000115ba86..f81e9948e66d1 100644 --- a/clang/lib/Sema/SemaType.cpp +++ b/clang/lib/Sema/SemaType.cpp @@ -1730,11 +1730,25 @@ static QualType ConvertDeclSpecToType(TypeProcessingState &state) { if (Result->containsErrors()) declarator.setInvalidType(); - if (S.getLangOpts().OpenCL && Result->isOCLImage3dWOType() && - !S.getOpenCLOptions().isSupported("cl_khr_3d_image_writes", S.getLangOpts())) { - S.Diag(DS.getTypeSpecTypeLoc(), diag::err_opencl_requires_extension) - << 0 << Result << "cl_khr_3d_image_writes"; - declarator.setInvalidType(); + if (S.getLangOpts().OpenCL) { + const auto &OpenCLOptions = S.getOpenCLOptions(); + StringRef OptName; + // OpenCL C v3.0 s6.3.3 - OpenCL image types require __opencl_c_images + // support + if ((Result->isImageType() || Result->isSamplerT()) && + (S.getLangOpts().OpenCLVersion >= 300 && + !OpenCLOptions.isSupported("__opencl_c_images", S.getLangOpts()))) + OptName = "__opencl_c_images"; + else if (Result->isOCLImage3dWOType() && + !OpenCLOptions.isSupported("cl_khr_3d_image_writes", + S.getLangOpts())) + OptName = "cl_khr_3d_image_writes"; + + if (!OptName.empty()) { + S.Diag(DS.getTypeSpecTypeLoc(), diag::err_opencl_requires_extension) + << 0 << Result << OptName; + declarator.setInvalidType(); + } } bool IsFixedPointType = DS.getTypeSpecType() == DeclSpec::TST_accum || diff --git a/clang/lib/Sema/TreeTransform.h b/clang/lib/Sema/TreeTransform.h index dd48d30301c37..3f3284d76cd85 100644 --- a/clang/lib/Sema/TreeTransform.h +++ b/clang/lib/Sema/TreeTransform.h @@ -5858,8 +5858,8 @@ TreeTransform::TransformFunctionProtoType(TypeLocBuilder &TLB, return getDerived().TransformFunctionProtoType( TLB, TL, nullptr, Qualifiers(), [&](FunctionProtoType::ExceptionSpecInfo &ESI, bool &Changed) { - return This->TransformExceptionSpec(TL.getBeginLoc(), ESI, - ExceptionStorage, Changed); + return This->getDerived().TransformExceptionSpec( + TL.getBeginLoc(), ESI, ExceptionStorage, Changed); }); } diff --git a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp index 4e49dbc2facce..69b90be9aa7e6 100644 --- a/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/CStringChecker.cpp @@ -2039,7 +2039,7 @@ void CStringChecker::evalStrcmpCommon(CheckerContext &C, const CallExpr *CE, RightStrRef = RightStrRef.substr(0, s2Term); // Use StringRef's comparison methods to compute the actual result. - int compareRes = IgnoreCase ? LeftStrRef.compare_lower(RightStrRef) + int compareRes = IgnoreCase ? LeftStrRef.compare_insensitive(RightStrRef) : LeftStrRef.compare(RightStrRef); // The strcmp function returns an integer greater than, equal to, or less diff --git a/clang/lib/StaticAnalyzer/Checkers/ContainerModeling.cpp b/clang/lib/StaticAnalyzer/Checkers/ContainerModeling.cpp index 73c6517fd0ebf..1a7f0d5ab74c2 100644 --- a/clang/lib/StaticAnalyzer/Checkers/ContainerModeling.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/ContainerModeling.cpp @@ -763,14 +763,14 @@ bool isBeginCall(const FunctionDecl *Func) { const auto *IdInfo = Func->getIdentifier(); if (!IdInfo) return false; - return IdInfo->getName().endswith_lower("begin"); + return IdInfo->getName().endswith_insensitive("begin"); } bool isEndCall(const FunctionDecl *Func) { const auto *IdInfo = Func->getIdentifier(); if (!IdInfo) return false; - return IdInfo->getName().endswith_lower("end"); + return IdInfo->getName().endswith_insensitive("end"); } const CXXRecordDecl *getCXXRecordDecl(ProgramStateRef State, diff --git a/clang/lib/StaticAnalyzer/Checkers/GCDAntipatternChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/GCDAntipatternChecker.cpp index 63fbe75fd4983..8e02ef74c6686 100644 --- a/clang/lib/StaticAnalyzer/Checkers/GCDAntipatternChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/GCDAntipatternChecker.cpp @@ -80,7 +80,7 @@ static bool isTest(const Decl *D) { if (const auto *CD = dyn_cast(OD->getParent())) { std::string ContainerName = CD->getNameAsString(); StringRef CN(ContainerName); - if (CN.contains_lower("test") || CN.contains_lower("mock")) + if (CN.contains_insensitive("test") || CN.contains_insensitive("mock")) return true; } } diff --git a/clang/lib/StaticAnalyzer/Checkers/Iterator.cpp b/clang/lib/StaticAnalyzer/Checkers/Iterator.cpp index ac0f24603dd90..4961901499914 100644 --- a/clang/lib/StaticAnalyzer/Checkers/Iterator.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/Iterator.cpp @@ -29,8 +29,8 @@ bool isIterator(const CXXRecordDecl *CRD) { return false; const auto Name = CRD->getName(); - if (!(Name.endswith_lower("iterator") || Name.endswith_lower("iter") || - Name.endswith_lower("it"))) + if (!(Name.endswith_insensitive("iterator") || + Name.endswith_insensitive("iter") || Name.endswith_insensitive("it"))) return false; bool HasCopyCtor = false, HasCopyAssign = true, HasDtor = false, diff --git a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp index e0f0dc35e7a71..a6470da09c458 100644 --- a/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/MallocChecker.cpp @@ -3145,9 +3145,10 @@ static SymbolRef findFailedReallocSymbol(ProgramStateRef currState, static bool isReferenceCountingPointerDestructor(const CXXDestructorDecl *DD) { if (const IdentifierInfo *II = DD->getParent()->getIdentifier()) { StringRef N = II->getName(); - if (N.contains_lower("ptr") || N.contains_lower("pointer")) { - if (N.contains_lower("ref") || N.contains_lower("cnt") || - N.contains_lower("intrusive") || N.contains_lower("shared")) { + if (N.contains_insensitive("ptr") || N.contains_insensitive("pointer")) { + if (N.contains_insensitive("ref") || N.contains_insensitive("cnt") || + N.contains_insensitive("intrusive") || + N.contains_insensitive("shared")) { return true; } } diff --git a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp index 6b176b3c4e2b2..75db1e195a432 100644 --- a/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp +++ b/clang/lib/StaticAnalyzer/Checkers/StreamChecker.cpp @@ -306,7 +306,8 @@ class StreamChecker : public CheckerStreamArgNo), C, State); if (!State) return; @@ -549,7 +551,8 @@ void StreamChecker::preFread(const FnDescription *Desc, const CallEvent &Call, CheckerContext &C) const { ProgramStateRef State = C.getState(); SVal StreamVal = getStreamArg(Desc, Call); - State = ensureStreamNonNull(StreamVal, C, State); + State = ensureStreamNonNull(StreamVal, Call.getArgExpr(Desc->StreamArgNo), C, + State); if (!State) return; State = ensureStreamOpened(StreamVal, C, State); @@ -573,7 +576,8 @@ void StreamChecker::preFwrite(const FnDescription *Desc, const CallEvent &Call, CheckerContext &C) const { ProgramStateRef State = C.getState(); SVal StreamVal = getStreamArg(Desc, Call); - State = ensureStreamNonNull(StreamVal, C, State); + State = ensureStreamNonNull(StreamVal, Call.getArgExpr(Desc->StreamArgNo), C, + State); if (!State) return; State = ensureStreamOpened(StreamVal, C, State); @@ -671,7 +675,8 @@ void StreamChecker::preFseek(const FnDescription *Desc, const CallEvent &Call, CheckerContext &C) const { ProgramStateRef State = C.getState(); SVal StreamVal = getStreamArg(Desc, Call); - State = ensureStreamNonNull(StreamVal, C, State); + State = ensureStreamNonNull(StreamVal, Call.getArgExpr(Desc->StreamArgNo), C, + State); if (!State) return; State = ensureStreamOpened(StreamVal, C, State); @@ -790,7 +795,8 @@ void StreamChecker::preDefault(const FnDescription *Desc, const CallEvent &Call, CheckerContext &C) const { ProgramStateRef State = C.getState(); SVal StreamVal = getStreamArg(Desc, Call); - State = ensureStreamNonNull(StreamVal, C, State); + State = ensureStreamNonNull(StreamVal, Call.getArgExpr(Desc->StreamArgNo), C, + State); if (!State) return; State = ensureStreamOpened(StreamVal, C, State); @@ -814,7 +820,8 @@ void StreamChecker::evalSetFeofFerror(const FnDescription *Desc, } ProgramStateRef -StreamChecker::ensureStreamNonNull(SVal StreamVal, CheckerContext &C, +StreamChecker::ensureStreamNonNull(SVal StreamVal, const Expr *StreamE, + CheckerContext &C, ProgramStateRef State) const { auto Stream = StreamVal.getAs(); if (!Stream) @@ -827,8 +834,11 @@ StreamChecker::ensureStreamNonNull(SVal StreamVal, CheckerContext &C, if (!StateNotNull && StateNull) { if (ExplodedNode *N = C.generateErrorNode(StateNull)) { - C.emitReport(std::make_unique( - BT_FileNull, "Stream pointer might be NULL.", N)); + auto R = std::make_unique( + BT_FileNull, "Stream pointer might be NULL.", N); + if (StreamE) + bugreporter::trackExpressionValue(N, StreamE, *R); + C.emitReport(std::move(R)); } return nullptr; } diff --git a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp index ecf1d1b5f0688..3785f498414f9 100644 --- a/clang/lib/StaticAnalyzer/Core/CallEvent.cpp +++ b/clang/lib/StaticAnalyzer/Core/CallEvent.cpp @@ -47,6 +47,7 @@ #include "clang/StaticAnalyzer/Core/PathSensitive/Store.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/ImmutableList.h" #include "llvm/ADT/None.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/PointerIntPair.h" @@ -466,6 +467,42 @@ bool CallEvent::isVariadic(const Decl *D) { llvm_unreachable("unknown callable kind"); } +static bool isTransparentUnion(QualType T) { + const RecordType *UT = T->getAsUnionType(); + return UT && UT->getDecl()->hasAttr(); +} + +// In some cases, symbolic cases should be transformed before we associate +// them with parameters. This function incapsulates such cases. +static SVal processArgument(SVal Value, const Expr *ArgumentExpr, + const ParmVarDecl *Parameter, SValBuilder &SVB) { + QualType ParamType = Parameter->getType(); + QualType ArgumentType = ArgumentExpr->getType(); + + // Transparent unions allow users to easily convert values of union field + // types into union-typed objects. + // + // Also, more importantly, they allow users to define functions with different + // different parameter types, substituting types matching transparent union + // field types with the union type itself. + // + // Here, we check specifically for latter cases and prevent binding + // field-typed values to union-typed regions. + if (isTransparentUnion(ParamType) && + // Let's check that we indeed trying to bind different types. + !isTransparentUnion(ArgumentType)) { + BasicValueFactory &BVF = SVB.getBasicValueFactory(); + + llvm::ImmutableList CompoundSVals = BVF.getEmptySValList(); + CompoundSVals = BVF.prependSVal(Value, CompoundSVals); + + // Wrap it with compound value. + return SVB.makeCompoundVal(ParamType, CompoundSVals); + } + + return Value; +} + static void addParameterValuesToBindings(const StackFrameContext *CalleeCtx, CallEvent::BindingsTy &Bindings, SValBuilder &SVB, @@ -490,10 +527,12 @@ static void addParameterValuesToBindings(const StackFrameContext *CalleeCtx, // determined in compile-time but not represented as arg-expressions, // which makes getArgSVal() fail and return UnknownVal. SVal ArgVal = Call.getArgSVal(Idx); + const Expr *ArgExpr = Call.getArgExpr(Idx); if (!ArgVal.isUnknown()) { Loc ParamLoc = SVB.makeLoc( MRMgr.getParamVarRegion(Call.getOriginExpr(), Idx, CalleeCtx)); - Bindings.push_back(std::make_pair(ParamLoc, ArgVal)); + Bindings.push_back( + std::make_pair(ParamLoc, processArgument(ArgVal, ArgExpr, *I, SVB))); } } diff --git a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp index ac7767f0d3c4d..6d17bcb8b87f0 100644 --- a/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangeConstraintManager.cpp @@ -1384,12 +1384,6 @@ RangeSet SymbolicRangeInferrer::VisitBinaryOperator(Range LHS, // Constraint manager implementation details //===----------------------------------------------------------------------===// -static SymbolRef simplify(ProgramStateRef State, SymbolRef Sym) { - SValBuilder &SVB = State->getStateManager().getSValBuilder(); - SVal SimplifiedVal = SVB.simplifySVal(State, SVB.makeSymbolVal(Sym)); - return SimplifiedVal.getAsSymbol(); -} - class RangeConstraintManager : public RangedConstraintManager { public: RangeConstraintManager(ExprEngine *EE, SValBuilder &SVB) @@ -1503,9 +1497,6 @@ class RangeConstraintManager : public RangedConstraintManager { // This is an infeasible assumption. return nullptr; - if (SymbolRef SimplifiedSym = simplify(State, Sym)) - Sym = SimplifiedSym; - if (ProgramStateRef NewState = setConstraint(State, Sym, NewConstraint)) { if (auto Equality = EqualityInfo::extract(Sym, Int, Adjustment)) { // If the original assumption is not Sym + Adjustment !=/ Int, @@ -1962,7 +1953,7 @@ LLVM_NODISCARD ProgramStateRef EquivalenceClass::simplify( SValBuilder &SVB, RangeSet::Factory &F, ProgramStateRef State) { SymbolSet ClassMembers = getClassMembers(State); for (const SymbolRef &MemberSym : ClassMembers) { - SymbolRef SimplifiedMemberSym = ::simplify(State, MemberSym); + SymbolRef SimplifiedMemberSym = ento::simplify(State, MemberSym); if (SimplifiedMemberSym && MemberSym != SimplifiedMemberSym) { EquivalenceClass ClassOfSimplifiedSym = EquivalenceClass::find(State, SimplifiedMemberSym); @@ -2288,7 +2279,6 @@ RangeConstraintManager::assumeSymNE(ProgramStateRef St, SymbolRef Sym, return St; llvm::APSInt Point = AdjustmentType.convert(Int) - Adjustment; - RangeSet New = getRange(St, Sym); New = F.deletePoint(New, Point); diff --git a/clang/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp b/clang/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp index 1b8945fb66af0..d227c025fb203 100644 --- a/clang/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp +++ b/clang/lib/StaticAnalyzer/Core/RangedConstraintManager.cpp @@ -23,12 +23,14 @@ RangedConstraintManager::~RangedConstraintManager() {} ProgramStateRef RangedConstraintManager::assumeSym(ProgramStateRef State, SymbolRef Sym, bool Assumption) { + Sym = simplify(State, Sym); + // Handle SymbolData. - if (isa(Sym)) { + if (isa(Sym)) return assumeSymUnsupported(State, Sym, Assumption); - // Handle symbolic expression. - } else if (const SymIntExpr *SIE = dyn_cast(Sym)) { + // Handle symbolic expression. + if (const SymIntExpr *SIE = dyn_cast(Sym)) { // We can only simplify expressions whose RHS is an integer. BinaryOperator::Opcode op = SIE->getOpcode(); @@ -93,6 +95,9 @@ ProgramStateRef RangedConstraintManager::assumeSym(ProgramStateRef State, ProgramStateRef RangedConstraintManager::assumeSymInclusiveRange( ProgramStateRef State, SymbolRef Sym, const llvm::APSInt &From, const llvm::APSInt &To, bool InRange) { + + Sym = simplify(State, Sym); + // Get the type used for calculating wraparound. BasicValueFactory &BVF = getBasicVals(); APSIntType WraparoundType = BVF.getAPSIntType(Sym->getType()); @@ -121,6 +126,8 @@ ProgramStateRef RangedConstraintManager::assumeSymInclusiveRange( ProgramStateRef RangedConstraintManager::assumeSymUnsupported(ProgramStateRef State, SymbolRef Sym, bool Assumption) { + Sym = simplify(State, Sym); + BasicValueFactory &BVF = getBasicVals(); QualType T = Sym->getType(); @@ -219,5 +226,13 @@ void RangedConstraintManager::computeAdjustment(SymbolRef &Sym, } } +SymbolRef simplify(ProgramStateRef State, SymbolRef Sym) { + SValBuilder &SVB = State->getStateManager().getSValBuilder(); + SVal SimplifiedVal = SVB.simplifySVal(State, SVB.makeSymbolVal(Sym)); + if (SymbolRef SimplifiedSym = SimplifiedVal.getAsSymbol()) + return SimplifiedSym; + return Sym; +} + } // end of namespace ento } // end of namespace clang diff --git a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp index d1f984632660f..fbceb26c39c7c 100644 --- a/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp +++ b/clang/lib/Tooling/Inclusions/HeaderIncludes.cpp @@ -252,9 +252,9 @@ bool IncludeCategoryManager::isMainHeader(StringRef IncludeName) const { // 1) foo.h => bar.cc // 2) foo.proto.h => foo.cc StringRef Matching; - if (MatchingFileStem.startswith_lower(HeaderStem)) + if (MatchingFileStem.startswith_insensitive(HeaderStem)) Matching = MatchingFileStem; // example 1), 2) - else if (FileStem.equals_lower(HeaderStem)) + else if (FileStem.equals_insensitive(HeaderStem)) Matching = FileStem; // example 3) if (!Matching.empty()) { llvm::Regex MainIncludeRegex(HeaderStem.str() + Style.IncludeIsMainRegex, diff --git a/clang/lib/Tooling/InterpolatingCompilationDatabase.cpp b/clang/lib/Tooling/InterpolatingCompilationDatabase.cpp index 3b65504b98ea3..650e510fb68ff 100644 --- a/clang/lib/Tooling/InterpolatingCompilationDatabase.cpp +++ b/clang/lib/Tooling/InterpolatingCompilationDatabase.cpp @@ -256,7 +256,7 @@ struct TransferableCommand { // Otherwise just check the clang executable file name. return !CmdLine.empty() && - llvm::sys::path::stem(CmdLine.front()).endswith_lower("cl"); + llvm::sys::path::stem(CmdLine.front()).endswith_insensitive("cl"); } // Map the language from the --std flag to that of the -x flag. diff --git a/clang/test/Analysis/diagnostics/PR46264.cpp b/clang/test/Analysis/diagnostics/PR46264.cpp new file mode 100644 index 0000000000000..466dada694be9 --- /dev/null +++ b/clang/test/Analysis/diagnostics/PR46264.cpp @@ -0,0 +1,36 @@ +// RUN: %clang_analyze_cc1 -analyzer-checker=core -analyzer-output=text -verify %s + +// PR46264 +// This case shall not crash with an assertion failure about void* dereferening. +// The crash has been last seen on commit +// `3ed8ebc2f6b8172bed48cc5986d3b7af4cfca1bc` from 24.05.2020. +namespace ns1 { +namespace a { +class b { +public: + typedef int b::*c; + operator c() { return d ? &b::d : 0; } + // expected-note@-1{{'?' condition is true}} + // expected-note@-2{{Assuming field 'd' is not equal to 0}} + // expected-note@-3{{Returning value, which participates in a condition later}} + int d; +}; +} // namespace a +using a::b; +class e { + void f(); + void g(); + b h; +}; +void e::f() { + e *i; + // expected-note@-1{{'i' declared without an initial value}} + if (h) + // expected-note@-1{{Taking true branch}} + // expected-note@-2{{'b::operator int ns1::a::b::*'}} + // expected-note@-3{{Returning from 'b::operator int ns1::a::b::*'}} + i->g(); + // expected-note@-1{{Called C++ object pointer is uninitialized}} + // expected-warning@-2{{Called C++ object pointer is uninitialized}} +} +} // namespace ns1 diff --git a/clang/test/Analysis/solver-sym-simplification-no-crash.c b/clang/test/Analysis/solver-sym-simplification-no-crash.c new file mode 100644 index 0000000000000..f90fad07f0c30 --- /dev/null +++ b/clang/test/Analysis/solver-sym-simplification-no-crash.c @@ -0,0 +1,26 @@ +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=debug.ExprInspection \ +// RUN: -verify + +// Here, we test that symbol simplification in the solver does not produce any +// crashes. + +// expected-no-diagnostics + +static int a, b; +static long c; + +static void f(int i, int j) +{ + (void)(j <= 0 && i ? i : j); +} + +static void g(void) +{ + int d = a - b | (c < 0); + for (;;) + { + f(d ^ c, c); + } +} diff --git a/clang/test/Analysis/solver-sym-simplification-with-proper-range-type.c b/clang/test/Analysis/solver-sym-simplification-with-proper-range-type.c new file mode 100644 index 0000000000000..248742e96b6b4 --- /dev/null +++ b/clang/test/Analysis/solver-sym-simplification-with-proper-range-type.c @@ -0,0 +1,29 @@ +// RUN: %clang_analyze_cc1 %s \ +// RUN: -analyzer-checker=core \ +// RUN: -analyzer-checker=debug.ExprInspection \ +// RUN: -verify + +// Here we test that the range based solver equivalency tracking mechanism +// assigns a properly typed range to the simplified symbol. + +void clang_analyzer_printState(); +void clang_analyzer_eval(int); + +void f(int a0, int b0, int c) +{ + int a1 = a0 - b0; + int b1 = (unsigned)a1 + c; + if (c == 0) { + + int d = 7L / b1; // ... + // At this point b1 is considered non-zero, which results in a new + // constraint for $a0 - $b0 + $c. The type of this sym is unsigned, + // however, the simplified sym is $a0 - $b0 and its type is signed. + // This is probably the result of the inherent improper handling of + // casts. Anyway, Range assignment for constraints use this type + // information. Therefore, we must make sure that first we simplify the + // symbol and only then we assign the range. + + clang_analyzer_eval(a0 - b0 != 0); // expected-warning{{TRUE}} + } +} diff --git a/clang/test/Analysis/stream-note.c b/clang/test/Analysis/stream-note.c index 71a5ba2315d9c..a509bb1b58315 100644 --- a/clang/test/Analysis/stream-note.c +++ b/clang/test/Analysis/stream-note.c @@ -77,3 +77,14 @@ void check_note_leak_2(int c) { fclose(F1); fclose(F2); } + +void check_track_null() { + FILE *F; + F = fopen("foo1.c", "r"); // expected-note {{Value assigned to 'F'}} expected-note {{Assuming pointer value is null}} + if (F != NULL) { // expected-note {{Taking false branch}} expected-note {{'F' is equal to NULL}} + fclose(F); + return; + } + fclose(F); // expected-warning {{Stream pointer might be NULL}} + // expected-note@-1 {{Stream pointer might be NULL}} +} diff --git a/clang/test/Analysis/transparent_union_bug.c b/clang/test/Analysis/transparent_union_bug.c new file mode 100644 index 0000000000000..b6069c6a59b19 --- /dev/null +++ b/clang/test/Analysis/transparent_union_bug.c @@ -0,0 +1,40 @@ +// RUN: %clang_analyze_cc1 -analyze -triple x86_64-apple-darwin10 \ +// RUN: -analyzer-checker=core,debug.ExprInspection -verify %s + +void clang_analyzer_warnIfReached(); + +typedef struct { + int value; +} Struct; + +typedef union { + Struct *ptr; + long num; +} __attribute__((transparent_union)) Alias; + +void foo(Struct *x); +void foo(Alias y) { + if (y.ptr == 0) { + // no-crash + } + clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}} +} +void foobar(long z); +void foobar(Alias z) { + if (z.num != 42) { + // no-crash + } + clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}} +} + +void foobaz(Alias x) { + if (x.ptr == 0) { + // no-crash + } + clang_analyzer_warnIfReached(); // expected-warning{{REACHABLE}} +} +void bar(Struct arg) { + foo(&arg); + foobar(42); + foobaz(&arg); +} diff --git a/clang/test/CXX/class/class.compare/class.spaceship/p2.cpp b/clang/test/CXX/class/class.compare/class.spaceship/p2.cpp index 01fff692c0c85..61d03e0723dc9 100644 --- a/clang/test/CXX/class/class.compare/class.spaceship/p2.cpp +++ b/clang/test/CXX/class/class.compare/class.spaceship/p2.cpp @@ -159,7 +159,7 @@ namespace BadDeducedType { namespace PR48856 { struct A { auto operator<=>(const A &) const = default; // expected-warning {{implicitly deleted}} - void (*x)(); // expected-note {{does not support relational comparisons}} + void (*x)(); // expected-note {{because there is no viable three-way comparison function for member 'x'}} }; struct B { @@ -192,12 +192,23 @@ namespace PR50591 { }; std::partial_ordering cmp_b2 = b2() <=> b2(); + using fp = void (*)(); + struct a3 { - using fp = void (*)(); operator fp() const; }; struct b3 { auto operator<=>(b3 const &) const = default; // expected-warning {{implicitly deleted}} - a3 f; // expected-note {{would compare member 'f' as 'void (*)()', which does not support relational comparisons}} + a3 f; // expected-note {{because there is no viable three-way comparison function}} + }; + + struct a4 { // Test that function pointer conversion operator here is ignored for this overload resolution. + operator int() const; + operator fp() const; + }; + struct b4 { + auto operator<=>(b4 const &) const = default; + a4 f; }; + std::strong_ordering cmp_b4 = b4() <=> b4(); } diff --git a/clang/test/CXX/drs/dr15xx.cpp b/clang/test/CXX/drs/dr15xx.cpp index 8bfa29a8b6676..e4f1105ceebe0 100644 --- a/clang/test/CXX/drs/dr15xx.cpp +++ b/clang/test/CXX/drs/dr15xx.cpp @@ -1,7 +1,7 @@ -// RUN: %clang_cc1 -std=c++98 -triple x86_64-unknown-unknown %s -verify -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-unknown %s -verify -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++14 -triple x86_64-unknown-unknown %s -verify -fexceptions -fcxx-exceptions -pedantic-errors -// RUN: %clang_cc1 -std=c++1z -triple x86_64-unknown-unknown %s -verify -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++98 -triple x86_64-unknown-unknown %s -verify=expected -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++11 -triple x86_64-unknown-unknown %s -verify=expected -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++14 -triple x86_64-unknown-unknown %s -verify=expected,cxx14_17 -fexceptions -fcxx-exceptions -pedantic-errors +// RUN: %clang_cc1 -std=c++17 -triple x86_64-unknown-unknown %s -verify=expected,cxx17 -fexceptions -fcxx-exceptions -pedantic-errors namespace dr1512 { // dr1512: 4 void f(char *p) { @@ -28,10 +28,10 @@ namespace dr1512 { // dr1512: 4 template void composite_pointer_type_is_ord() { composite_pointer_type_is_base(); - typedef __typeof(val() < val()) cmp; - typedef __typeof(val() <= val()) cmp; - typedef __typeof(val() > val()) cmp; - typedef __typeof(val() >= val()) cmp; + typedef __typeof(val() < val()) cmp; // cxx17-warning 2 {{ordered comparison of function pointers}} + typedef __typeof(val() <= val()) cmp; // cxx17-warning 2 {{ordered comparison of function pointers}} + typedef __typeof(val() > val()) cmp; // cxx17-warning 2 {{ordered comparison of function pointers}} + typedef __typeof(val() >= val()) cmp; // cxx17-warning 2 {{ordered comparison of function pointers}} typedef bool cmp; } @@ -79,8 +79,8 @@ namespace dr1512 { // dr1512: 4 no_composite_pointer_type(); #if __cplusplus > 201402 - composite_pointer_type_is_ord(); - composite_pointer_type_is_ord(); + composite_pointer_type_is_ord(); // expected-note {{requested here}} + composite_pointer_type_is_ord(); // expected-note {{requested here}} composite_pointer_type_is_unord(); composite_pointer_type_is_unord(); // FIXME: This looks like a standard defect; these should probably all have type 'int (B::*)()'. diff --git a/clang/test/CXX/drs/dr3xx.cpp b/clang/test/CXX/drs/dr3xx.cpp index 5c4c75595817c..9c8fe2de2f598 100644 --- a/clang/test/CXX/drs/dr3xx.cpp +++ b/clang/test/CXX/drs/dr3xx.cpp @@ -18,9 +18,9 @@ namespace dr301 { // dr301: yes void operator-(S, S); void f() { - bool a = (void(*)(S, S))operator+ < + bool a = (void(*)(S, S))operator+ < // expected-warning {{ordered comparison of function pointers}} (void(*)(S, S))operator+; - bool b = (void(*)(S, S))operator- < // cxx20_2b-note {{to match this '<'}} + bool b = (void(*)(S, S))operator- < // cxx20_2b-note {{to match this '<'}} cxx98_17-warning {{ordered comparison of function pointers}} (void(*)(S, S))operator-; // cxx20_2b-error {{expected '>'}} bool c = (void(*)(S, S))operator+ < // expected-note {{to match this '<'}} (void(*)(S, S))operator-; // expected-error {{expected '>'}} diff --git a/clang/test/CXX/expr/expr.const/p2-0x.cpp b/clang/test/CXX/expr/expr.const/p2-0x.cpp index 82c92b77bd173..81efbb0a47271 100644 --- a/clang/test/CXX/expr/expr.const/p2-0x.cpp +++ b/clang/test/CXX/expr/expr.const/p2-0x.cpp @@ -515,6 +515,7 @@ namespace UnspecifiedRelations { constexpr void (*pf)() = &f, (*pg)() = &g; constexpr bool u13 = pf < pg; // expected-error {{constant expression}} expected-note {{comparison has unspecified value}} + // expected-warning@-1 {{ordered comparison of function pointers}} constexpr bool u14 = pf == pg; // If two pointers point to non-static data members of the same object with diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vget.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vget.c new file mode 100644 index 0000000000000..ac287ff294019 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vget.c @@ -0,0 +1,546 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: @test_vget_v_i8m2_i8m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv8i8.nxv16i8( [[SRC:%.*]], i64 8) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint8m1_t test_vget_v_i8m2_i8m1(vint8m2_t src) { + return vget_v_i8m2_i8m1(src, 1); +} + +// CHECK-RV64-LABEL: @test_vget_v_i8m4_i8m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv8i8.nxv32i8( [[SRC:%.*]], i64 16) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint8m1_t test_vget_v_i8m4_i8m1(vint8m4_t src) { + return vget_v_i8m4_i8m1(src, 2); +} + +// CHECK-RV64-LABEL: @test_vget_v_i8m4_i8m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv16i8.nxv32i8( [[SRC:%.*]], i64 16) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint8m2_t test_vget_v_i8m4_i8m2(vint8m4_t src) { + return vget_v_i8m4_i8m2(src, 1); +} + +// CHECK-RV64-LABEL: @test_vget_v_i8m8_i8m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv8i8.nxv64i8( [[SRC:%.*]], i64 48) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint8m1_t test_vget_v_i8m8_i8m1(vint8m8_t src) { + return vget_v_i8m8_i8m1(src, 6); +} + +// CHECK-RV64-LABEL: @test_vget_v_i8m8_i8m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv16i8.nxv64i8( [[SRC:%.*]], i64 48) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint8m2_t test_vget_v_i8m8_i8m2(vint8m8_t src) { + return vget_v_i8m8_i8m2(src, 3); +} + +// CHECK-RV64-LABEL: @test_vget_v_i8m8_i8m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv32i8.nxv64i8( [[SRC:%.*]], i64 32) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint8m4_t test_vget_v_i8m8_i8m4(vint8m8_t src) { + return vget_v_i8m8_i8m4(src, 1); +} + +// CHECK-RV64-LABEL: @test_vget_v_i16m2_i16m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv4i16.nxv8i16( [[SRC:%.*]], i64 4) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint16m1_t test_vget_v_i16m2_i16m1(vint16m2_t src) { + return vget_v_i16m2_i16m1(src, 1); +} + +// CHECK-RV64-LABEL: @test_vget_v_i16m4_i16m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv4i16.nxv16i16( [[SRC:%.*]], i64 8) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint16m1_t test_vget_v_i16m4_i16m1(vint16m4_t src) { + return vget_v_i16m4_i16m1(src, 2); +} + +// CHECK-RV64-LABEL: @test_vget_v_i16m4_i16m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv8i16.nxv16i16( [[SRC:%.*]], i64 8) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint16m2_t test_vget_v_i16m4_i16m2(vint16m4_t src) { + return vget_v_i16m4_i16m2(src, 1); +} + +// CHECK-RV64-LABEL: @test_vget_v_i16m8_i16m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv4i16.nxv32i16( [[SRC:%.*]], i64 24) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint16m1_t test_vget_v_i16m8_i16m1(vint16m8_t src) { + return vget_v_i16m8_i16m1(src, 6); +} + +// CHECK-RV64-LABEL: @test_vget_v_i16m8_i16m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv8i16.nxv32i16( [[SRC:%.*]], i64 24) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint16m2_t test_vget_v_i16m8_i16m2(vint16m8_t src) { + return vget_v_i16m8_i16m2(src, 3); +} + +// CHECK-RV64-LABEL: @test_vget_v_i16m8_i16m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv16i16.nxv32i16( [[SRC:%.*]], i64 16) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint16m4_t test_vget_v_i16m8_i16m4(vint16m8_t src) { + return vget_v_i16m8_i16m4(src, 1); +} + +// CHECK-RV64-LABEL: @test_vget_v_i32m2_i32m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv2i32.nxv4i32( [[SRC:%.*]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint32m1_t test_vget_v_i32m2_i32m1(vint32m2_t src) { + return vget_v_i32m2_i32m1(src, 1); +} + +// CHECK-RV64-LABEL: @test_vget_v_i32m4_i32m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv2i32.nxv8i32( [[SRC:%.*]], i64 4) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint32m1_t test_vget_v_i32m4_i32m1(vint32m4_t src) { + return vget_v_i32m4_i32m1(src, 2); +} + +// CHECK-RV64-LABEL: @test_vget_v_i32m4_i32m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv4i32.nxv8i32( [[SRC:%.*]], i64 4) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint32m2_t test_vget_v_i32m4_i32m2(vint32m4_t src) { + return vget_v_i32m4_i32m2(src, 1); +} + +// CHECK-RV64-LABEL: @test_vget_v_i32m8_i32m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv2i32.nxv16i32( [[SRC:%.*]], i64 12) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint32m1_t test_vget_v_i32m8_i32m1(vint32m8_t src) { + return vget_v_i32m8_i32m1(src, 6); +} + +// CHECK-RV64-LABEL: @test_vget_v_i32m8_i32m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv4i32.nxv16i32( [[SRC:%.*]], i64 12) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint32m2_t test_vget_v_i32m8_i32m2(vint32m8_t src) { + return vget_v_i32m8_i32m2(src, 3); +} + +// CHECK-RV64-LABEL: @test_vget_v_i32m8_i32m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv8i32.nxv16i32( [[SRC:%.*]], i64 8) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint32m4_t test_vget_v_i32m8_i32m4(vint32m8_t src) { + return vget_v_i32m8_i32m4(src, 1); +} + +// CHECK-RV64-LABEL: @test_vget_v_i64m2_i64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv1i64.nxv2i64( [[SRC:%.*]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m1_t test_vget_v_i64m2_i64m1(vint64m2_t src) { + return vget_v_i64m2_i64m1(src, 1); +} + +// CHECK-RV64-LABEL: @test_vget_v_i64m4_i64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv1i64.nxv4i64( [[SRC:%.*]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m1_t test_vget_v_i64m4_i64m1(vint64m4_t src) { + return vget_v_i64m4_i64m1(src, 2); +} + +// CHECK-RV64-LABEL: @test_vget_v_i64m4_i64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv2i64.nxv4i64( [[SRC:%.*]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m2_t test_vget_v_i64m4_i64m2(vint64m4_t src) { + return vget_v_i64m4_i64m2(src, 1); +} + +// CHECK-RV64-LABEL: @test_vget_v_i64m8_i64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv1i64.nxv8i64( [[SRC:%.*]], i64 6) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m1_t test_vget_v_i64m8_i64m1(vint64m8_t src) { + return vget_v_i64m8_i64m1(src, 6); +} + +// CHECK-RV64-LABEL: @test_vget_v_i64m8_i64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv2i64.nxv8i64( [[SRC:%.*]], i64 6) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m2_t test_vget_v_i64m8_i64m2(vint64m8_t src) { + return vget_v_i64m8_i64m2(src, 3); +} + +// CHECK-RV64-LABEL: @test_vget_v_i64m8_i64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv4i64.nxv8i64( [[SRC:%.*]], i64 4) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m4_t test_vget_v_i64m8_i64m4(vint64m8_t src) { + return vget_v_i64m8_i64m4(src, 1); +} + +// CHECK-RV64-LABEL: @test_vget_v_u8m2_u8m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv8i8.nxv16i8( [[SRC:%.*]], i64 8) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint8m1_t test_vget_v_u8m2_u8m1(vuint8m2_t src) { + return vget_v_u8m2_u8m1(src, 1); +} + +// CHECK-RV64-LABEL: @test_vget_v_u8m4_u8m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv8i8.nxv32i8( [[SRC:%.*]], i64 16) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint8m1_t test_vget_v_u8m4_u8m1(vuint8m4_t src) { + return vget_v_u8m4_u8m1(src, 2); +} + +// CHECK-RV64-LABEL: @test_vget_v_u8m4_u8m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv16i8.nxv32i8( [[SRC:%.*]], i64 16) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint8m2_t test_vget_v_u8m4_u8m2(vuint8m4_t src) { + return vget_v_u8m4_u8m2(src, 1); +} + +// CHECK-RV64-LABEL: @test_vget_v_u8m8_u8m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv8i8.nxv64i8( [[SRC:%.*]], i64 48) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint8m1_t test_vget_v_u8m8_u8m1(vuint8m8_t src) { + return vget_v_u8m8_u8m1(src, 6); +} + +// CHECK-RV64-LABEL: @test_vget_v_u8m8_u8m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv16i8.nxv64i8( [[SRC:%.*]], i64 48) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint8m2_t test_vget_v_u8m8_u8m2(vuint8m8_t src) { + return vget_v_u8m8_u8m2(src, 3); +} + +// CHECK-RV64-LABEL: @test_vget_v_u8m8_u8m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv32i8.nxv64i8( [[SRC:%.*]], i64 32) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint8m4_t test_vget_v_u8m8_u8m4(vuint8m8_t src) { + return vget_v_u8m8_u8m4(src, 1); +} + +// CHECK-RV64-LABEL: @test_vget_v_u16m2_u16m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv4i16.nxv8i16( [[SRC:%.*]], i64 4) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint16m1_t test_vget_v_u16m2_u16m1(vuint16m2_t src) { + return vget_v_u16m2_u16m1(src, 1); +} + +// CHECK-RV64-LABEL: @test_vget_v_u16m4_u16m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv4i16.nxv16i16( [[SRC:%.*]], i64 8) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint16m1_t test_vget_v_u16m4_u16m1(vuint16m4_t src) { + return vget_v_u16m4_u16m1(src, 2); +} + +// CHECK-RV64-LABEL: @test_vget_v_u16m4_u16m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv8i16.nxv16i16( [[SRC:%.*]], i64 8) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint16m2_t test_vget_v_u16m4_u16m2(vuint16m4_t src) { + return vget_v_u16m4_u16m2(src, 1); +} + +// CHECK-RV64-LABEL: @test_vget_v_u16m8_u16m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv4i16.nxv32i16( [[SRC:%.*]], i64 24) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint16m1_t test_vget_v_u16m8_u16m1(vuint16m8_t src) { + return vget_v_u16m8_u16m1(src, 6); +} + +// CHECK-RV64-LABEL: @test_vget_v_u16m8_u16m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv8i16.nxv32i16( [[SRC:%.*]], i64 24) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint16m2_t test_vget_v_u16m8_u16m2(vuint16m8_t src) { + return vget_v_u16m8_u16m2(src, 3); +} + +// CHECK-RV64-LABEL: @test_vget_v_u16m8_u16m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv16i16.nxv32i16( [[SRC:%.*]], i64 16) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint16m4_t test_vget_v_u16m8_u16m4(vuint16m8_t src) { + return vget_v_u16m8_u16m4(src, 1); +} + +// CHECK-RV64-LABEL: @test_vget_v_u32m2_u32m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv2i32.nxv4i32( [[SRC:%.*]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint32m1_t test_vget_v_u32m2_u32m1(vuint32m2_t src) { + return vget_v_u32m2_u32m1(src, 1); +} + +// CHECK-RV64-LABEL: @test_vget_v_u32m4_u32m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv2i32.nxv8i32( [[SRC:%.*]], i64 4) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint32m1_t test_vget_v_u32m4_u32m1(vuint32m4_t src) { + return vget_v_u32m4_u32m1(src, 2); +} + +// CHECK-RV64-LABEL: @test_vget_v_u32m4_u32m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv4i32.nxv8i32( [[SRC:%.*]], i64 4) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint32m2_t test_vget_v_u32m4_u32m2(vuint32m4_t src) { + return vget_v_u32m4_u32m2(src, 1); +} + +// CHECK-RV64-LABEL: @test_vget_v_u32m8_u32m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv2i32.nxv16i32( [[SRC:%.*]], i64 12) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint32m1_t test_vget_v_u32m8_u32m1(vuint32m8_t src) { + return vget_v_u32m8_u32m1(src, 6); +} + +// CHECK-RV64-LABEL: @test_vget_v_u32m8_u32m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv4i32.nxv16i32( [[SRC:%.*]], i64 12) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint32m2_t test_vget_v_u32m8_u32m2(vuint32m8_t src) { + return vget_v_u32m8_u32m2(src, 3); +} + +// CHECK-RV64-LABEL: @test_vget_v_u32m8_u32m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv8i32.nxv16i32( [[SRC:%.*]], i64 8) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint32m4_t test_vget_v_u32m8_u32m4(vuint32m8_t src) { + return vget_v_u32m8_u32m4(src, 1); +} + +// CHECK-RV64-LABEL: @test_vget_v_u64m2_u64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv1i64.nxv2i64( [[SRC:%.*]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m1_t test_vget_v_u64m2_u64m1(vuint64m2_t src) { + return vget_v_u64m2_u64m1(src, 1); +} + +// CHECK-RV64-LABEL: @test_vget_v_u64m4_u64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv1i64.nxv4i64( [[SRC:%.*]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m1_t test_vget_v_u64m4_u64m1(vuint64m4_t src) { + return vget_v_u64m4_u64m1(src, 2); +} + +// CHECK-RV64-LABEL: @test_vget_v_u64m4_u64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv2i64.nxv4i64( [[SRC:%.*]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m2_t test_vget_v_u64m4_u64m2(vuint64m4_t src) { + return vget_v_u64m4_u64m2(src, 1); +} + +// CHECK-RV64-LABEL: @test_vget_v_u64m8_u64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv1i64.nxv8i64( [[SRC:%.*]], i64 6) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m1_t test_vget_v_u64m8_u64m1(vuint64m8_t src) { + return vget_v_u64m8_u64m1(src, 6); +} + +// CHECK-RV64-LABEL: @test_vget_v_u64m8_u64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv2i64.nxv8i64( [[SRC:%.*]], i64 6) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m2_t test_vget_v_u64m8_u64m2(vuint64m8_t src) { + return vget_v_u64m8_u64m2(src, 3); +} + +// CHECK-RV64-LABEL: @test_vget_v_u64m8_u64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv4i64.nxv8i64( [[SRC:%.*]], i64 4) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m4_t test_vget_v_u64m8_u64m4(vuint64m8_t src) { + return vget_v_u64m8_u64m4(src, 1); +} + +// CHECK-RV64-LABEL: @test_vget_v_f32m2_f32m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv2f32.nxv4f32( [[SRC:%.*]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m1_t test_vget_v_f32m2_f32m1(vfloat32m2_t src) { + return vget_v_f32m2_f32m1(src, 1); +} + +// CHECK-RV64-LABEL: @test_vget_v_f32m4_f32m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv2f32.nxv8f32( [[SRC:%.*]], i64 4) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m1_t test_vget_v_f32m4_f32m1(vfloat32m4_t src) { + return vget_v_f32m4_f32m1(src, 2); +} + +// CHECK-RV64-LABEL: @test_vget_v_f32m4_f32m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv4f32.nxv8f32( [[SRC:%.*]], i64 4) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m2_t test_vget_v_f32m4_f32m2(vfloat32m4_t src) { + return vget_v_f32m4_f32m2(src, 1); +} + +// CHECK-RV64-LABEL: @test_vget_v_f32m8_f32m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv2f32.nxv16f32( [[SRC:%.*]], i64 12) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m1_t test_vget_v_f32m8_f32m1(vfloat32m8_t src) { + return vget_v_f32m8_f32m1(src, 6); +} + +// CHECK-RV64-LABEL: @test_vget_v_f32m8_f32m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv4f32.nxv16f32( [[SRC:%.*]], i64 12) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m2_t test_vget_v_f32m8_f32m2(vfloat32m8_t src) { + return vget_v_f32m8_f32m2(src, 3); +} + +// CHECK-RV64-LABEL: @test_vget_v_f32m8_f32m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv8f32.nxv16f32( [[SRC:%.*]], i64 8) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m4_t test_vget_v_f32m8_f32m4(vfloat32m8_t src) { + return vget_v_f32m8_f32m4(src, 1); +} + +// CHECK-RV64-LABEL: @test_vget_v_f64m2_f64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv1f64.nxv2f64( [[SRC:%.*]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat64m1_t test_vget_v_f64m2_f64m1(vfloat64m2_t src) { + return vget_v_f64m2_f64m1(src, 1); +} + +// CHECK-RV64-LABEL: @test_vget_v_f64m4_f64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv1f64.nxv4f64( [[SRC:%.*]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat64m1_t test_vget_v_f64m4_f64m1(vfloat64m4_t src) { + return vget_v_f64m4_f64m1(src, 2); +} + +// CHECK-RV64-LABEL: @test_vget_v_f64m4_f64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv2f64.nxv4f64( [[SRC:%.*]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat64m2_t test_vget_v_f64m4_f64m2(vfloat64m4_t src) { + return vget_v_f64m4_f64m2(src, 1); +} + +// CHECK-RV64-LABEL: @test_vget_v_f64m8_f64m1( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv1f64.nxv8f64( [[SRC:%.*]], i64 6) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat64m1_t test_vget_v_f64m8_f64m1(vfloat64m8_t src) { + return vget_v_f64m8_f64m1(src, 6); +} + +// CHECK-RV64-LABEL: @test_vget_v_f64m8_f64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv2f64.nxv8f64( [[SRC:%.*]], i64 6) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat64m2_t test_vget_v_f64m8_f64m2(vfloat64m8_t src) { + return vget_v_f64m8_f64m2(src, 3); +} + +// CHECK-RV64-LABEL: @test_vget_v_f64m8_f64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.extract.nxv4f64.nxv8f64( [[SRC:%.*]], i64 4) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat64m4_t test_vget_v_f64m8_f64m4(vfloat64m8_t src) { + return vget_v_f64m8_f64m4(src, 1); +} diff --git a/clang/test/CodeGen/RISCV/rvv-intrinsics/vset.c b/clang/test/CodeGen/RISCV/rvv-intrinsics/vset.c new file mode 100644 index 0000000000000..95da8010aeb26 --- /dev/null +++ b/clang/test/CodeGen/RISCV/rvv-intrinsics/vset.c @@ -0,0 +1,546 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: riscv-registered-target +// RUN: %clang_cc1 -triple riscv64 -target-feature +f -target-feature +d -target-feature +experimental-v \ +// RUN: -disable-O0-optnone -emit-llvm %s -o - | opt -S -mem2reg | FileCheck --check-prefix=CHECK-RV64 %s + +#include + +// CHECK-RV64-LABEL: @test_vset_v_i8m1_i8m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16i8.nxv8i8( [[DEST:%.*]], [[VAL:%.*]], i64 8) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint8m2_t test_vset_v_i8m1_i8m2(vint8m2_t dest, vint8m1_t val) { + return vset_v_i8m1_i8m2(dest, 1, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_i8m1_i8m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv32i8.nxv8i8( [[DEST:%.*]], [[VAL:%.*]], i64 24) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint8m4_t test_vset_v_i8m1_i8m4(vint8m4_t dest, vint8m1_t val) { + return vset_v_i8m1_i8m4(dest, 3, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_i8m2_i8m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv32i8.nxv16i8( [[DEST:%.*]], [[VAL:%.*]], i64 16) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint8m4_t test_vset_v_i8m2_i8m4(vint8m4_t dest, vint8m2_t val) { + return vset_v_i8m2_i8m4(dest, 1, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_i8m1_i8m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv64i8.nxv8i8( [[DEST:%.*]], [[VAL:%.*]], i64 56) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint8m8_t test_vset_v_i8m1_i8m8(vint8m8_t dest, vint8m1_t val) { + return vset_v_i8m1_i8m8(dest, 7, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_i8m2_i8m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv64i8.nxv16i8( [[DEST:%.*]], [[VAL:%.*]], i64 32) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint8m8_t test_vset_v_i8m2_i8m8(vint8m8_t dest, vint8m2_t val) { + return vset_v_i8m2_i8m8(dest, 2, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_i8m4_i8m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv64i8.nxv32i8( [[DEST:%.*]], [[VAL:%.*]], i64 32) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint8m8_t test_vset_v_i8m4_i8m8(vint8m8_t dest, vint8m4_t val) { + return vset_v_i8m4_i8m8(dest, 1, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_i16m1_i16m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8i16.nxv4i16( [[DEST:%.*]], [[VAL:%.*]], i64 4) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint16m2_t test_vset_v_i16m1_i16m2(vint16m2_t dest, vint16m1_t val) { + return vset_v_i16m1_i16m2(dest, 1, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_i16m1_i16m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16i16.nxv4i16( [[DEST:%.*]], [[VAL:%.*]], i64 12) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint16m4_t test_vset_v_i16m1_i16m4(vint16m4_t dest, vint16m1_t val) { + return vset_v_i16m1_i16m4(dest, 3, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_i16m2_i16m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16i16.nxv8i16( [[DEST:%.*]], [[VAL:%.*]], i64 8) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint16m4_t test_vset_v_i16m2_i16m4(vint16m4_t dest, vint16m2_t val) { + return vset_v_i16m2_i16m4(dest, 1, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_i16m1_i16m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv32i16.nxv4i16( [[DEST:%.*]], [[VAL:%.*]], i64 28) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint16m8_t test_vset_v_i16m1_i16m8(vint16m8_t dest, vint16m1_t val) { + return vset_v_i16m1_i16m8(dest, 7, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_i16m2_i16m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv32i16.nxv8i16( [[DEST:%.*]], [[VAL:%.*]], i64 16) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint16m8_t test_vset_v_i16m2_i16m8(vint16m8_t dest, vint16m2_t val) { + return vset_v_i16m2_i16m8(dest, 2, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_i16m4_i16m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv32i16.nxv16i16( [[DEST:%.*]], [[VAL:%.*]], i64 16) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint16m8_t test_vset_v_i16m4_i16m8(vint16m8_t dest, vint16m4_t val) { + return vset_v_i16m4_i16m8(dest, 1, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_i32m1_i32m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.nxv2i32( [[DEST:%.*]], [[VAL:%.*]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint32m2_t test_vset_v_i32m1_i32m2(vint32m2_t dest, vint32m1_t val) { + return vset_v_i32m1_i32m2(dest, 1, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_i32m1_i32m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8i32.nxv2i32( [[DEST:%.*]], [[VAL:%.*]], i64 6) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint32m4_t test_vset_v_i32m1_i32m4(vint32m4_t dest, vint32m1_t val) { + return vset_v_i32m1_i32m4(dest, 3, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_i32m2_i32m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8i32.nxv4i32( [[DEST:%.*]], [[VAL:%.*]], i64 4) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint32m4_t test_vset_v_i32m2_i32m4(vint32m4_t dest, vint32m2_t val) { + return vset_v_i32m2_i32m4(dest, 1, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_i32m1_i32m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16i32.nxv2i32( [[DEST:%.*]], [[VAL:%.*]], i64 14) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint32m8_t test_vset_v_i32m1_i32m8(vint32m8_t dest, vint32m1_t val) { + return vset_v_i32m1_i32m8(dest, 7, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_i32m2_i32m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16i32.nxv4i32( [[DEST:%.*]], [[VAL:%.*]], i64 8) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint32m8_t test_vset_v_i32m2_i32m8(vint32m8_t dest, vint32m2_t val) { + return vset_v_i32m2_i32m8(dest, 2, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_i32m4_i32m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16i32.nxv8i32( [[DEST:%.*]], [[VAL:%.*]], i64 8) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint32m8_t test_vset_v_i32m4_i32m8(vint32m8_t dest, vint32m4_t val) { + return vset_v_i32m4_i32m8(dest, 1, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_i64m1_i64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv2i64.nxv1i64( [[DEST:%.*]], [[VAL:%.*]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m2_t test_vset_v_i64m1_i64m2(vint64m2_t dest, vint64m1_t val) { + return vset_v_i64m1_i64m2(dest, 1, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_i64m1_i64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4i64.nxv1i64( [[DEST:%.*]], [[VAL:%.*]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m4_t test_vset_v_i64m1_i64m4(vint64m4_t dest, vint64m1_t val) { + return vset_v_i64m1_i64m4(dest, 3, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_i64m2_i64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4i64.nxv2i64( [[DEST:%.*]], [[VAL:%.*]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m4_t test_vset_v_i64m2_i64m4(vint64m4_t dest, vint64m2_t val) { + return vset_v_i64m2_i64m4(dest, 1, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_i64m1_i64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8i64.nxv1i64( [[DEST:%.*]], [[VAL:%.*]], i64 7) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m8_t test_vset_v_i64m1_i64m8(vint64m8_t dest, vint64m1_t val) { + return vset_v_i64m1_i64m8(dest, 7, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_i64m2_i64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8i64.nxv2i64( [[DEST:%.*]], [[VAL:%.*]], i64 4) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m8_t test_vset_v_i64m2_i64m8(vint64m8_t dest, vint64m2_t val) { + return vset_v_i64m2_i64m8(dest, 2, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_i64m4_i64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8i64.nxv4i64( [[DEST:%.*]], [[VAL:%.*]], i64 4) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vint64m8_t test_vset_v_i64m4_i64m8(vint64m8_t dest, vint64m4_t val) { + return vset_v_i64m4_i64m8(dest, 1, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_u8m1_u8m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16i8.nxv8i8( [[DEST:%.*]], [[VAL:%.*]], i64 8) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint8m2_t test_vset_v_u8m1_u8m2(vuint8m2_t dest, vuint8m1_t val) { + return vset_v_u8m1_u8m2(dest, 1, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_u8m1_u8m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv32i8.nxv8i8( [[DEST:%.*]], [[VAL:%.*]], i64 24) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint8m4_t test_vset_v_u8m1_u8m4(vuint8m4_t dest, vuint8m1_t val) { + return vset_v_u8m1_u8m4(dest, 3, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_u8m2_u8m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv32i8.nxv16i8( [[DEST:%.*]], [[VAL:%.*]], i64 16) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint8m4_t test_vset_v_u8m2_u8m4(vuint8m4_t dest, vuint8m2_t val) { + return vset_v_u8m2_u8m4(dest, 1, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_u8m1_u8m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv64i8.nxv8i8( [[DEST:%.*]], [[VAL:%.*]], i64 56) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint8m8_t test_vset_v_u8m1_u8m8(vuint8m8_t dest, vuint8m1_t val) { + return vset_v_u8m1_u8m8(dest, 7, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_u8m2_u8m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv64i8.nxv16i8( [[DEST:%.*]], [[VAL:%.*]], i64 32) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint8m8_t test_vset_v_u8m2_u8m8(vuint8m8_t dest, vuint8m2_t val) { + return vset_v_u8m2_u8m8(dest, 2, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_u8m4_u8m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv64i8.nxv32i8( [[DEST:%.*]], [[VAL:%.*]], i64 32) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint8m8_t test_vset_v_u8m4_u8m8(vuint8m8_t dest, vuint8m4_t val) { + return vset_v_u8m4_u8m8(dest, 1, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_u16m1_u16m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8i16.nxv4i16( [[DEST:%.*]], [[VAL:%.*]], i64 4) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint16m2_t test_vset_v_u16m1_u16m2(vuint16m2_t dest, vuint16m1_t val) { + return vset_v_u16m1_u16m2(dest, 1, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_u16m1_u16m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16i16.nxv4i16( [[DEST:%.*]], [[VAL:%.*]], i64 12) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint16m4_t test_vset_v_u16m1_u16m4(vuint16m4_t dest, vuint16m1_t val) { + return vset_v_u16m1_u16m4(dest, 3, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_u16m2_u16m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16i16.nxv8i16( [[DEST:%.*]], [[VAL:%.*]], i64 8) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint16m4_t test_vset_v_u16m2_u16m4(vuint16m4_t dest, vuint16m2_t val) { + return vset_v_u16m2_u16m4(dest, 1, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_u16m1_u16m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv32i16.nxv4i16( [[DEST:%.*]], [[VAL:%.*]], i64 28) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint16m8_t test_vset_v_u16m1_u16m8(vuint16m8_t dest, vuint16m1_t val) { + return vset_v_u16m1_u16m8(dest, 7, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_u16m2_u16m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv32i16.nxv8i16( [[DEST:%.*]], [[VAL:%.*]], i64 16) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint16m8_t test_vset_v_u16m2_u16m8(vuint16m8_t dest, vuint16m2_t val) { + return vset_v_u16m2_u16m8(dest, 2, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_u16m4_u16m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv32i16.nxv16i16( [[DEST:%.*]], [[VAL:%.*]], i64 16) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint16m8_t test_vset_v_u16m4_u16m8(vuint16m8_t dest, vuint16m4_t val) { + return vset_v_u16m4_u16m8(dest, 1, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_u32m1_u32m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4i32.nxv2i32( [[DEST:%.*]], [[VAL:%.*]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint32m2_t test_vset_v_u32m1_u32m2(vuint32m2_t dest, vuint32m1_t val) { + return vset_v_u32m1_u32m2(dest, 1, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_u32m1_u32m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8i32.nxv2i32( [[DEST:%.*]], [[VAL:%.*]], i64 6) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint32m4_t test_vset_v_u32m1_u32m4(vuint32m4_t dest, vuint32m1_t val) { + return vset_v_u32m1_u32m4(dest, 3, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_u32m2_u32m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8i32.nxv4i32( [[DEST:%.*]], [[VAL:%.*]], i64 4) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint32m4_t test_vset_v_u32m2_u32m4(vuint32m4_t dest, vuint32m2_t val) { + return vset_v_u32m2_u32m4(dest, 1, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_u32m1_u32m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16i32.nxv2i32( [[DEST:%.*]], [[VAL:%.*]], i64 14) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint32m8_t test_vset_v_u32m1_u32m8(vuint32m8_t dest, vuint32m1_t val) { + return vset_v_u32m1_u32m8(dest, 7, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_u32m2_u32m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16i32.nxv4i32( [[DEST:%.*]], [[VAL:%.*]], i64 8) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint32m8_t test_vset_v_u32m2_u32m8(vuint32m8_t dest, vuint32m2_t val) { + return vset_v_u32m2_u32m8(dest, 2, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_u32m4_u32m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16i32.nxv8i32( [[DEST:%.*]], [[VAL:%.*]], i64 8) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint32m8_t test_vset_v_u32m4_u32m8(vuint32m8_t dest, vuint32m4_t val) { + return vset_v_u32m4_u32m8(dest, 1, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_u64m1_u64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv2i64.nxv1i64( [[DEST:%.*]], [[VAL:%.*]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m2_t test_vset_v_u64m1_u64m2(vuint64m2_t dest, vuint64m1_t val) { + return vset_v_u64m1_u64m2(dest, 1, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_u64m1_u64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4i64.nxv1i64( [[DEST:%.*]], [[VAL:%.*]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m4_t test_vset_v_u64m1_u64m4(vuint64m4_t dest, vuint64m1_t val) { + return vset_v_u64m1_u64m4(dest, 3, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_u64m2_u64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4i64.nxv2i64( [[DEST:%.*]], [[VAL:%.*]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m4_t test_vset_v_u64m2_u64m4(vuint64m4_t dest, vuint64m2_t val) { + return vset_v_u64m2_u64m4(dest, 1, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_u64m1_u64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8i64.nxv1i64( [[DEST:%.*]], [[VAL:%.*]], i64 7) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m8_t test_vset_v_u64m1_u64m8(vuint64m8_t dest, vuint64m1_t val) { + return vset_v_u64m1_u64m8(dest, 7, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_u64m2_u64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8i64.nxv2i64( [[DEST:%.*]], [[VAL:%.*]], i64 4) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m8_t test_vset_v_u64m2_u64m8(vuint64m8_t dest, vuint64m2_t val) { + return vset_v_u64m2_u64m8(dest, 2, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_u64m4_u64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8i64.nxv4i64( [[DEST:%.*]], [[VAL:%.*]], i64 4) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vuint64m8_t test_vset_v_u64m4_u64m8(vuint64m8_t dest, vuint64m4_t val) { + return vset_v_u64m4_u64m8(dest, 1, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_f32m1_f32m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4f32.nxv2f32( [[DEST:%.*]], [[VAL:%.*]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m2_t test_vset_v_f32m1_f32m2(vfloat32m2_t dest, vfloat32m1_t val) { + return vset_v_f32m1_f32m2(dest, 1, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_f32m1_f32m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8f32.nxv2f32( [[DEST:%.*]], [[VAL:%.*]], i64 6) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m4_t test_vset_v_f32m1_f32m4(vfloat32m4_t dest, vfloat32m1_t val) { + return vset_v_f32m1_f32m4(dest, 3, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_f32m2_f32m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8f32.nxv4f32( [[DEST:%.*]], [[VAL:%.*]], i64 4) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m4_t test_vset_v_f32m2_f32m4(vfloat32m4_t dest, vfloat32m2_t val) { + return vset_v_f32m2_f32m4(dest, 1, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_f32m1_f32m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16f32.nxv2f32( [[DEST:%.*]], [[VAL:%.*]], i64 14) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m8_t test_vset_v_f32m1_f32m8(vfloat32m8_t dest, vfloat32m1_t val) { + return vset_v_f32m1_f32m8(dest, 7, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_f32m2_f32m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16f32.nxv4f32( [[DEST:%.*]], [[VAL:%.*]], i64 8) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m8_t test_vset_v_f32m2_f32m8(vfloat32m8_t dest, vfloat32m2_t val) { + return vset_v_f32m2_f32m8(dest, 2, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_f32m4_f32m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv16f32.nxv8f32( [[DEST:%.*]], [[VAL:%.*]], i64 8) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat32m8_t test_vset_v_f32m4_f32m8(vfloat32m8_t dest, vfloat32m4_t val) { + return vset_v_f32m4_f32m8(dest, 1, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_f64m1_f64m2( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv2f64.nxv1f64( [[DEST:%.*]], [[VAL:%.*]], i64 1) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat64m2_t test_vset_v_f64m1_f64m2(vfloat64m2_t dest, vfloat64m1_t val) { + return vset_v_f64m1_f64m2(dest, 1, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_f64m1_f64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4f64.nxv1f64( [[DEST:%.*]], [[VAL:%.*]], i64 3) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat64m4_t test_vset_v_f64m1_f64m4(vfloat64m4_t dest, vfloat64m1_t val) { + return vset_v_f64m1_f64m4(dest, 3, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_f64m2_f64m4( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv4f64.nxv2f64( [[DEST:%.*]], [[VAL:%.*]], i64 2) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat64m4_t test_vset_v_f64m2_f64m4(vfloat64m4_t dest, vfloat64m2_t val) { + return vset_v_f64m2_f64m4(dest, 1, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_f64m1_f64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8f64.nxv1f64( [[DEST:%.*]], [[VAL:%.*]], i64 7) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat64m8_t test_vset_v_f64m1_f64m8(vfloat64m8_t dest, vfloat64m1_t val) { + return vset_v_f64m1_f64m8(dest, 7, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_f64m2_f64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8f64.nxv2f64( [[DEST:%.*]], [[VAL:%.*]], i64 4) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat64m8_t test_vset_v_f64m2_f64m8(vfloat64m8_t dest, vfloat64m2_t val) { + return vset_v_f64m2_f64m8(dest, 2, val); +} + +// CHECK-RV64-LABEL: @test_vset_v_f64m4_f64m8( +// CHECK-RV64-NEXT: entry: +// CHECK-RV64-NEXT: [[TMP0:%.*]] = call @llvm.experimental.vector.insert.nxv8f64.nxv4f64( [[DEST:%.*]], [[VAL:%.*]], i64 4) +// CHECK-RV64-NEXT: ret [[TMP0]] +// +vfloat64m8_t test_vset_v_f64m4_f64m8(vfloat64m8_t dest, vfloat64m4_t val) { + return vset_v_f64m4_f64m8(dest, 1, val); +} diff --git a/clang/test/CodeGen/X86/avx512vl-builtins.c b/clang/test/CodeGen/X86/avx512vl-builtins.c index ef27542876a43..4a7226489f573 100644 --- a/clang/test/CodeGen/X86/avx512vl-builtins.c +++ b/clang/test/CodeGen/X86/avx512vl-builtins.c @@ -9426,7 +9426,7 @@ __m128i test_mm_mask_alignr_epi32(__m128i __W, __mmask8 __U, __m128i __A, __m128 // CHECK-LABEL: @test_mm_mask_alignr_epi32 // CHECK: shufflevector <4 x i32> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> // CHECK: select <4 x i1> %{{.*}}, <4 x i32> %{{.*}}, <4 x i32> %{{.*}} - return _mm_mask_alignr_epi32(__W, __U, __A, __B, 1); + return _mm_mask_alignr_epi32(__W, __U, __A, __B, 5); } __m128i test_mm_maskz_alignr_epi32(__mmask8 __U, __m128i __A, __m128i __B) { @@ -9446,7 +9446,7 @@ __m256i test_mm256_mask_alignr_epi32(__m256i __W, __mmask8 __U, __m256i __A, __m // CHECK-LABEL: @test_mm256_mask_alignr_epi32 // CHECK: shufflevector <8 x i32> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> // CHECK: select <8 x i1> %{{.*}}, <8 x i32> %{{.*}}, <8 x i32> %{{.*}} - return _mm256_mask_alignr_epi32(__W, __U, __A, __B, 1); + return _mm256_mask_alignr_epi32(__W, __U, __A, __B, 9); } __m256i test_mm256_maskz_alignr_epi32(__mmask8 __U, __m256i __A, __m256i __B) { @@ -9466,7 +9466,7 @@ __m128i test_mm_mask_alignr_epi64(__m128i __W, __mmask8 __U, __m128i __A, __m128 // CHECK-LABEL: @test_mm_mask_alignr_epi64 // CHECK: shufflevector <2 x i64> %{{.*}}, <2 x i64> %{{.*}}, <2 x i32> // CHECK: select <2 x i1> %{{.*}}, <2 x i64> %{{.*}}, <2 x i64> %{{.*}} - return _mm_mask_alignr_epi64(__W, __U, __A, __B, 1); + return _mm_mask_alignr_epi64(__W, __U, __A, __B, 3); } __m128i test_mm_maskz_alignr_epi64(__mmask8 __U, __m128i __A, __m128i __B) { @@ -9486,7 +9486,7 @@ __m256i test_mm256_mask_alignr_epi64(__m256i __W, __mmask8 __U, __m256i __A, __m // CHECK-LABEL: @test_mm256_mask_alignr_epi64 // CHECK: shufflevector <4 x i64> %{{.*}}, <4 x i64> %{{.*}}, <4 x i32> // CHECK: select <4 x i1> %{{.*}}, <4 x i64> %{{.*}}, <4 x i64> %{{.*}} - return _mm256_mask_alignr_epi64(__W, __U, __A, __B, 1); + return _mm256_mask_alignr_epi64(__W, __U, __A, __B, 5); } __m256i test_mm256_maskz_alignr_epi64(__mmask8 __U, __m256i __A, __m256i __B) { diff --git a/clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c b/clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c new file mode 100644 index 0000000000000..2ef8698d3dda3 --- /dev/null +++ b/clang/test/CodeGen/aarch64-sve-vector-bits-codegen.c @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -O2 -S -o - %s -msve-vector-bits=256 | FileCheck %s --check-prefixes=CHECK,CHECK256 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -O2 -S -o - %s -msve-vector-bits=512 | FileCheck %s --check-prefixes=CHECK,CHECK512 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -O2 -S -o - %s -msve-vector-bits=1024 | FileCheck %s --check-prefixes=CHECK,CHECK1024 +// RUN: %clang_cc1 -triple aarch64-none-linux-gnu -target-feature +sve -fallow-half-arguments-and-returns -O2 -S -o - %s -msve-vector-bits=2048 | FileCheck %s --check-prefixes=CHECK,CHECK2048 +// REQUIRES: aarch64-registered-target + +#include + +void func(int *restrict a, int *restrict b) { +// CHECK-LABEL: func +// CHECK256-COUNT-8: st1w +// CHECK512-COUNT-4: st1w +// CHECK1024-COUNT-2: st1w +// CHECK2048-COUNT-1: st1w +#pragma clang loop vectorize(enable) + for (int i = 0; i < 64; ++i) + a[i] += b[i]; +} diff --git a/clang/test/CodeGen/aarch64-varargs.c b/clang/test/CodeGen/aarch64-varargs.c index 908fb4ae5d10e..397d61616b916 100644 --- a/clang/test/CodeGen/aarch64-varargs.c +++ b/clang/test/CodeGen/aarch64-varargs.c @@ -11,18 +11,18 @@ va_list the_list; int simple_int(void) { // CHECK-LABEL: define{{.*}} i32 @simple_int return va_arg(the_list, int); -// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0 // CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]] // CHECK: [[VAARG_MAYBE_REG]] // CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8 -// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0 // CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]] // CHECK: [[VAARG_IN_REG]] -// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1) +// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 1) // CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]] // CHECK-BE: [[REG_ADDR_ALIGNED:%[0-9]+]] = getelementptr inbounds i8, i8* [[REG_ADDR]], i64 4 // CHECK-BE: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR_ALIGNED]] to i32* @@ -30,9 +30,9 @@ int simple_int(void) { // CHECK: br label %[[VAARG_END:[a-z._0-9]+]] // CHECK: [[VAARG_ON_STACK]] -// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8 -// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK-BE: [[STACK_ALIGNED:%[a-z_0-9]*]] = getelementptr inbounds i8, i8* [[STACK]], i64 4 // CHECK-BE: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK_ALIGNED]] to i32* // CHECK-LE: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to i32* @@ -47,7 +47,7 @@ int simple_int(void) { __int128 aligned_int(void) { // CHECK-LABEL: define{{.*}} i128 @aligned_int return va_arg(the_list, __int128); -// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0 // CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]] @@ -55,24 +55,24 @@ __int128 aligned_int(void) { // CHECK: [[ALIGN_REGOFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 15 // CHECK: [[ALIGNED_REGOFFS:%[a-z_0-9]+]] = and i32 [[ALIGN_REGOFFS]], -16 // CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[ALIGNED_REGOFFS]], 16 -// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0 // CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]] // CHECK: [[VAARG_IN_REG]] -// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1) +// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 1) // CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[ALIGNED_REGOFFS]] // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to i128* // CHECK: br label %[[VAARG_END:[a-z._0-9]+]] // CHECK: [[VAARG_ON_STACK]] -// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[STACKINT:%[a-z_0-9]+]] = ptrtoint i8* [[STACK]] to i64 // CHECK: [[ALIGN_STACK:%[a-z_0-9]+]] = add i64 [[STACKINT]], 15 // CHECK: [[ALIGNED_STACK_INT:%[a-z_0-9]+]] = and i64 [[ALIGN_STACK]], -16 // CHECK: [[ALIGNED_STACK_PTR:%[a-z_0-9]+]] = inttoptr i64 [[ALIGNED_STACK_INT]] to i8* // CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[ALIGNED_STACK_PTR]], i64 16 -// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[ALIGNED_STACK_PTR]] to i128* // CHECK: br label %[[VAARG_END]] @@ -89,28 +89,28 @@ struct bigstruct { struct bigstruct simple_indirect(void) { // CHECK-LABEL: define{{.*}} void @simple_indirect return va_arg(the_list, struct bigstruct); -// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0 // CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]] // CHECK: [[VAARG_MAYBE_REG]] // CHECK-NOT: and i32 // CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8 -// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0 // CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]] // CHECK: [[VAARG_IN_REG]] -// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1) +// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 1) // CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]] // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.bigstruct** // CHECK: br label %[[VAARG_END:[a-z._0-9]+]] // CHECK: [[VAARG_ON_STACK]] -// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK-NOT: and i64 // CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8 -// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.bigstruct** // CHECK: br label %[[VAARG_END]] @@ -127,26 +127,26 @@ struct aligned_bigstruct { struct aligned_bigstruct simple_aligned_indirect(void) { // CHECK-LABEL: define{{.*}} void @simple_aligned_indirect return va_arg(the_list, struct aligned_bigstruct); -// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0 // CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]] // CHECK: [[VAARG_MAYBE_REG]] // CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8 -// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0 // CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]] // CHECK: [[VAARG_IN_REG]] -// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1) +// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 1) // CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]] // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.aligned_bigstruct** // CHECK: br label %[[VAARG_END:[a-z._0-9]+]] // CHECK: [[VAARG_ON_STACK]] -// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8 -// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.aligned_bigstruct** // CHECK: br label %[[VAARG_END]] @@ -158,18 +158,18 @@ struct aligned_bigstruct simple_aligned_indirect(void) { double simple_double(void) { // CHECK-LABEL: define{{.*}} double @simple_double return va_arg(the_list, double); -// CHECK: [[VR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 4) +// CHECK: [[VR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 4) // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[VR_OFFS]], 0 // CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG]] // CHECK: [[VAARG_MAYBE_REG]] // CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[VR_OFFS]], 16 -// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 4) +// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 4) // CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0 // CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]] // CHECK: [[VAARG_IN_REG]] -// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 2) +// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 2) // CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[VR_OFFS]] // CHECK-BE: [[REG_ADDR_ALIGNED:%[a-z_0-9]*]] = getelementptr inbounds i8, i8* [[REG_ADDR]], i64 8 // CHECK-BE: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR_ALIGNED]] to double* @@ -177,9 +177,9 @@ double simple_double(void) { // CHECK: br label %[[VAARG_END:[a-z._0-9]+]] // CHECK: [[VAARG_ON_STACK]] -// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8 -// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to double* // CHECK: br label %[[VAARG_END]] @@ -196,18 +196,18 @@ struct hfa { struct hfa simple_hfa(void) { // CHECK-LABEL: define{{.*}} %struct.hfa @simple_hfa return va_arg(the_list, struct hfa); -// CHECK: [[VR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 4) +// CHECK: [[VR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 4) // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[VR_OFFS]], 0 // CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]] // CHECK: [[VAARG_MAYBE_REG]] // CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[VR_OFFS]], 32 -// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 4) +// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 4) // CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0 // CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]] // CHECK: [[VAARG_IN_REG]] -// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 2) +// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 2) // CHECK: [[FIRST_REG:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[VR_OFFS]] // CHECK-LE: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[FIRST_REG]], i64 0 // CHECK-BE: [[EL_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[FIRST_REG]], i64 12 @@ -225,9 +225,9 @@ struct hfa simple_hfa(void) { // CHECK: br label %[[VAARG_END:[a-z_.0-9]+]] // CHECK: [[VAARG_ON_STACK]] -// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8 -// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.hfa* // CHECK: br label %[[VAARG_END]] @@ -243,18 +243,18 @@ typedef int underaligned_int __attribute__((packed,aligned(2))); underaligned_int underaligned_int_test() { // CHECK-LABEL: define{{.*}} i32 @underaligned_int_test() return va_arg(the_list, underaligned_int); -// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0 // CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]] // CHECK: [[VAARG_MAYBE_REG]] // CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8 -// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0 // CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]] // CHECK: [[VAARG_IN_REG]] -// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1) +// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 1) // CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]] // CHECK-BE: [[REG_ADDR_ALIGNED:%[0-9]+]] = getelementptr inbounds i8, i8* [[REG_ADDR]], i64 4 // CHECK-BE: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR_ALIGNED]] to i32* @@ -262,9 +262,9 @@ underaligned_int underaligned_int_test() { // CHECK: br label %[[VAARG_END:[a-z._0-9]+]] // CHECK: [[VAARG_ON_STACK]] -// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8 -// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK-BE: [[STACK_ALIGNED:%[a-z_0-9]*]] = getelementptr inbounds i8, i8* [[STACK]], i64 4 // CHECK-BE: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK_ALIGNED]] to i32* // CHECK-LE: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to i32* @@ -280,18 +280,18 @@ typedef int overaligned_int __attribute__((aligned(32))); overaligned_int overaligned_int_test() { // CHECK-LABEL: define{{.*}} i32 @overaligned_int_test() return va_arg(the_list, overaligned_int); -// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0 // CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]] // CHECK: [[VAARG_MAYBE_REG]] // CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8 -// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0 // CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]] // CHECK: [[VAARG_IN_REG]] -// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1) +// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 1) // CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]] // CHECK-BE: [[REG_ADDR_ALIGNED:%[0-9]+]] = getelementptr inbounds i8, i8* [[REG_ADDR]], i64 4 // CHECK-BE: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR_ALIGNED]] to i32* @@ -299,9 +299,9 @@ overaligned_int overaligned_int_test() { // CHECK: br label %[[VAARG_END:[a-z._0-9]+]] // CHECK: [[VAARG_ON_STACK]] -// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8 -// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK-BE: [[STACK_ALIGNED:%[a-z_0-9]*]] = getelementptr inbounds i8, i8* [[STACK]], i64 4 // CHECK-BE: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK_ALIGNED]] to i32* // CHECK-LE: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to i32* @@ -317,26 +317,26 @@ typedef long long underaligned_long_long __attribute__((packed,aligned(2))); underaligned_long_long underaligned_long_long_test() { // CHECK-LABEL: define{{.*}} i64 @underaligned_long_long_test() return va_arg(the_list, underaligned_long_long); -// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0 // CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]] // CHECK: [[VAARG_MAYBE_REG]] // CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8 -// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0 // CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]] // CHECK: [[VAARG_IN_REG]] -// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1) +// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 1) // CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]] // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to i64* // CHECK: br label %[[VAARG_END:[a-z._0-9]+]] // CHECK: [[VAARG_ON_STACK]] -// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8 -// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to i64* // CHECK: br label %[[VAARG_END]] @@ -350,26 +350,26 @@ typedef long long overaligned_long_long __attribute__((aligned(32))); overaligned_long_long overaligned_long_long_test() { // CHECK-LABEL: define{{.*}} i64 @overaligned_long_long_test() return va_arg(the_list, overaligned_long_long); -// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0 // CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]] // CHECK: [[VAARG_MAYBE_REG]] // CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8 -// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0 // CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]] // CHECK: [[VAARG_IN_REG]] -// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1) +// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 1) // CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]] // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to i64* // CHECK: br label %[[VAARG_END:[a-z._0-9]+]] // CHECK: [[VAARG_ON_STACK]] -// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8 -// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to i64* // CHECK: br label %[[VAARG_END]] @@ -383,7 +383,7 @@ typedef __int128 underaligned_int128 __attribute__((packed,aligned(2))); underaligned_int128 underaligned_int128_test() { // CHECK-LABEL: define{{.*}} i128 @underaligned_int128_test() return va_arg(the_list, underaligned_int128); -// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0 // CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]] @@ -391,24 +391,24 @@ underaligned_int128 underaligned_int128_test() { // CHECK: [[ALIGN_REGOFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 15 // CHECK: [[ALIGNED_REGOFFS:%[a-z_0-9]+]] = and i32 [[ALIGN_REGOFFS]], -16 // CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[ALIGNED_REGOFFS]], 16 -// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0 // CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]] // CHECK: [[VAARG_IN_REG]] -// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1) +// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 1) // CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[ALIGNED_REGOFFS]] // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to i128* // CHECK: br label %[[VAARG_END:[a-z._0-9]+]] // CHECK: [[VAARG_ON_STACK]] -// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[STACKINT:%[a-z_0-9]+]] = ptrtoint i8* [[STACK]] to i64 // CHECK: [[ALIGN_STACK:%[a-z_0-9]+]] = add i64 [[STACKINT]], 15 // CHECK: [[ALIGNED_STACK_INT:%[a-z_0-9]+]] = and i64 [[ALIGN_STACK]], -16 // CHECK: [[ALIGNED_STACK_PTR:%[a-z_0-9]+]] = inttoptr i64 [[ALIGNED_STACK_INT]] to i8* // CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[ALIGNED_STACK_PTR]], i64 16 -// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[ALIGNED_STACK_PTR]] to i128* // CHECK: br label %[[VAARG_END]] @@ -422,7 +422,7 @@ typedef __int128 overaligned_int128 __attribute__((aligned(32))); overaligned_int128 overaligned_int128_test() { // CHECK-LABEL: define{{.*}} i128 @overaligned_int128_test() return va_arg(the_list, overaligned_int128); -// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0 // CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]] @@ -430,24 +430,24 @@ overaligned_int128 overaligned_int128_test() { // CHECK: [[ALIGN_REGOFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 15 // CHECK: [[ALIGNED_REGOFFS:%[a-z_0-9]+]] = and i32 [[ALIGN_REGOFFS]], -16 // CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[ALIGNED_REGOFFS]], 16 -// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0 // CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]] // CHECK: [[VAARG_IN_REG]] -// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1) +// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 1) // CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[ALIGNED_REGOFFS]] // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to i128* // CHECK: br label %[[VAARG_END:[a-z._0-9]+]] // CHECK: [[VAARG_ON_STACK]] -// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[STACKINT:%[a-z_0-9]+]] = ptrtoint i8* [[STACK]] to i64 // CHECK: [[ALIGN_STACK:%[a-z_0-9]+]] = add i64 [[STACKINT]], 15 // CHECK: [[ALIGNED_STACK_INT:%[a-z_0-9]+]] = and i64 [[ALIGN_STACK]], -16 // CHECK: [[ALIGNED_STACK_PTR:%[a-z_0-9]+]] = inttoptr i64 [[ALIGNED_STACK_INT]] to i8* // CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[ALIGNED_STACK_PTR]], i64 16 -// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[ALIGNED_STACK_PTR]] to i128* // CHECK: br label %[[VAARG_END]] @@ -476,26 +476,26 @@ underaligned_int_struct underaligned_int_struct_test() { // CHECK-LE-LABEL: define{{.*}} i32 @underaligned_int_struct_test() // CHECK-BE-LABEL: define{{.*}} i64 @underaligned_int_struct_test() return va_arg(the_list, underaligned_int_struct); -// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0 // CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]] // CHECK: [[VAARG_MAYBE_REG]] // CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8 -// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0 // CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]] // CHECK: [[VAARG_IN_REG]] -// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1) +// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 1) // CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]] // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.underaligned_int_struct* // CHECK: br label %[[VAARG_END:[a-z._0-9]+]] // CHECK: [[VAARG_ON_STACK]] -// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8 -// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.underaligned_int_struct* // CHECK: br label %[[VAARG_END]] @@ -509,26 +509,26 @@ typedef struct __attribute__((aligned(16))) { overaligned_int_struct overaligned_int_struct_test() { // CHECK-LABEL: define{{.*}} i128 @overaligned_int_struct_test() return va_arg(the_list, overaligned_int_struct); -// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0 // CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]] // CHECK: [[VAARG_MAYBE_REG]] // CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 16 -// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0 // CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]] // CHECK: [[VAARG_IN_REG]] -// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1) +// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 1) // CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]] // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.overaligned_int_struct* // CHECK: br label %[[VAARG_END:[a-z._0-9]+]] // CHECK: [[VAARG_ON_STACK]] -// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 16 -// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.overaligned_int_struct* // CHECK: br label %[[VAARG_END]] @@ -542,26 +542,26 @@ typedef struct __attribute__((packed,aligned(2))) { underaligned_long_long_struct underaligned_long_long_struct_test() { // CHECK-LABEL: define{{.*}} i64 @underaligned_long_long_struct_test() return va_arg(the_list, underaligned_long_long_struct); -// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0 // CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]] // CHECK: [[VAARG_MAYBE_REG]] // CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8 -// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0 // CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]] // CHECK: [[VAARG_IN_REG]] -// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1) +// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 1) // CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]] // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.underaligned_long_long_struct* // CHECK: br label %[[VAARG_END:[a-z._0-9]+]] // CHECK: [[VAARG_ON_STACK]] -// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8 -// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.underaligned_long_long_struct* // CHECK: br label %[[VAARG_END]] @@ -575,26 +575,26 @@ typedef struct __attribute__((aligned(16))) { overaligned_long_long_struct overaligned_long_long_struct_test() { // CHECK-LABEL: define{{.*}} i128 @overaligned_long_long_struct_test() return va_arg(the_list, overaligned_long_long_struct); -// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0 // CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]] // CHECK: [[VAARG_MAYBE_REG]] // CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 16 -// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0 // CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]] // CHECK: [[VAARG_IN_REG]] -// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1) +// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 1) // CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]] // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.overaligned_long_long_struct* // CHECK: br label %[[VAARG_END:[a-z._0-9]+]] // CHECK: [[VAARG_ON_STACK]] -// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 16 -// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.overaligned_long_long_struct* // CHECK: br label %[[VAARG_END]] @@ -608,26 +608,26 @@ typedef struct __attribute__((packed,aligned(2))) { underaligned_int128_struct underaligned_int128_struct_test() { // CHECK-LABEL: define{{.*}} [2 x i64] @underaligned_int128_struct_test() return va_arg(the_list, underaligned_int128_struct); -// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0 // CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]] // CHECK: [[VAARG_MAYBE_REG]] // CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 16 -// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0 // CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]] // CHECK: [[VAARG_IN_REG]] -// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1) +// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 1) // CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]] // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.underaligned_int128_struct* // CHECK: br label %[[VAARG_END:[a-z._0-9]+]] // CHECK: [[VAARG_ON_STACK]] -// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 16 -// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.underaligned_int128_struct* // CHECK: br label %[[VAARG_END]] @@ -642,26 +642,26 @@ typedef struct __attribute__((aligned(32))) { overaligned_int128_struct overaligned_int128_struct_test() { // CHECK-LABEL: define{{.*}} void @overaligned_int128_struct_test(%struct.overaligned_int128_struct* noalias sret(%struct.overaligned_int128_struct) align 32 %agg.result) return va_arg(the_list, overaligned_int128_struct); -// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0 // CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]] // CHECK: [[VAARG_MAYBE_REG]] // CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8 -// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0 // CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]] // CHECK: [[VAARG_IN_REG]] -// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1) +// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 1) // CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]] // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.overaligned_int128_struct** // CHECK: br label %[[VAARG_END:[a-z._0-9]+]] // CHECK: [[VAARG_ON_STACK]] -// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8 -// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.overaligned_int128_struct** // CHECK: br label %[[VAARG_END]] @@ -679,26 +679,26 @@ underaligned_int_struct_member underaligned_int_struct_member_test() { // CHECK-LE-LABEL: define{{.*}} i32 @underaligned_int_struct_member_test() // CHECK-BE-LABEL: define{{.*}} i64 @underaligned_int_struct_member_test() return va_arg(the_list, underaligned_int_struct_member); -// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0 // CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]] // CHECK: [[VAARG_MAYBE_REG]] // CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8 -// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0 // CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]] // CHECK: [[VAARG_IN_REG]] -// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1) +// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 1) // CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]] // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.underaligned_int_struct_member* // CHECK: br label %[[VAARG_END:[a-z._0-9]+]] // CHECK: [[VAARG_ON_STACK]] -// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8 -// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.underaligned_int_struct_member* // CHECK: br label %[[VAARG_END]] @@ -712,7 +712,7 @@ typedef struct { overaligned_int_struct_member overaligned_int_struct_member_test() { // CHECK-LABEL: define{{.*}} i128 @overaligned_int_struct_member_test() return va_arg(the_list, overaligned_int_struct_member); -// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0 // CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]] @@ -720,24 +720,24 @@ overaligned_int_struct_member overaligned_int_struct_member_test() { // CHECK: [[ALIGN_REGOFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 15 // CHECK: [[ALIGNED_REGOFFS:%[a-z_0-9]+]] = and i32 [[ALIGN_REGOFFS]], -16 // CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[ALIGNED_REGOFFS]], 16 -// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0 // CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]] // CHECK: [[VAARG_IN_REG]] -// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1) +// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 1) // CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[ALIGNED_REGOFFS]] // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.overaligned_int_struct_member* // CHECK: br label %[[VAARG_END:[a-z._0-9]+]] // CHECK: [[VAARG_ON_STACK]] -// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[STACKINT:%[a-z_0-9]+]] = ptrtoint i8* [[STACK]] to i64 // CHECK: [[ALIGN_STACK:%[a-z_0-9]+]] = add i64 [[STACKINT]], 15 // CHECK: [[ALIGNED_STACK_INT:%[a-z_0-9]+]] = and i64 [[ALIGN_STACK]], -16 // CHECK: [[ALIGNED_STACK_PTR:%[a-z_0-9]+]] = inttoptr i64 [[ALIGNED_STACK_INT]] to i8* // CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[ALIGNED_STACK_PTR]], i64 16 -// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[ALIGNED_STACK_PTR]] to %struct.overaligned_int_struct_member* // CHECK: br label %[[VAARG_END]] @@ -751,26 +751,26 @@ typedef struct { underaligned_long_long_struct_member underaligned_long_long_struct_member_test() { // CHECK-LABEL: define{{.*}} i64 @underaligned_long_long_struct_member_test() return va_arg(the_list, underaligned_long_long_struct_member); -// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0 // CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]] // CHECK: [[VAARG_MAYBE_REG]] // CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8 -// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0 // CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]] // CHECK: [[VAARG_IN_REG]] -// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1) +// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 1) // CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]] // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.underaligned_long_long_struct_member* // CHECK: br label %[[VAARG_END:[a-z._0-9]+]] // CHECK: [[VAARG_ON_STACK]] -// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8 -// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.underaligned_long_long_struct_member* // CHECK: br label %[[VAARG_END]] @@ -784,7 +784,7 @@ typedef struct { overaligned_long_long_struct_member overaligned_long_long_struct_member_test() { // CHECK-LABEL: define{{.*}} i128 @overaligned_long_long_struct_member_test() return va_arg(the_list, overaligned_long_long_struct_member); -// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0 // CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]] @@ -792,24 +792,24 @@ overaligned_long_long_struct_member overaligned_long_long_struct_member_test() { // CHECK: [[ALIGN_REGOFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 15 // CHECK: [[ALIGNED_REGOFFS:%[a-z_0-9]+]] = and i32 [[ALIGN_REGOFFS]], -16 // CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[ALIGNED_REGOFFS]], 16 -// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0 // CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]] // CHECK: [[VAARG_IN_REG]] -// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1) +// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 1) // CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[ALIGNED_REGOFFS]] // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.overaligned_long_long_struct_member* // CHECK: br label %[[VAARG_END:[a-z._0-9]+]] // CHECK: [[VAARG_ON_STACK]] -// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[STACKINT:%[a-z_0-9]+]] = ptrtoint i8* [[STACK]] to i64 // CHECK: [[ALIGN_STACK:%[a-z_0-9]+]] = add i64 [[STACKINT]], 15 // CHECK: [[ALIGNED_STACK_INT:%[a-z_0-9]+]] = and i64 [[ALIGN_STACK]], -16 // CHECK: [[ALIGNED_STACK_PTR:%[a-z_0-9]+]] = inttoptr i64 [[ALIGNED_STACK_INT]] to i8* // CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[ALIGNED_STACK_PTR]], i64 16 -// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[ALIGNED_STACK_PTR]] to %struct.overaligned_long_long_struct_member* // CHECK: br label %[[VAARG_END]] @@ -823,26 +823,26 @@ typedef struct { underaligned_int128_struct_member underaligned_int128_struct_member_test() { // CHECK-LABEL: define{{.*}} [2 x i64] @underaligned_int128_struct_member_test() return va_arg(the_list, underaligned_int128_struct_member); -// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0 // CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]] // CHECK: [[VAARG_MAYBE_REG]] // CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 16 -// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0 // CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]] // CHECK: [[VAARG_IN_REG]] -// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1) +// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 1) // CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]] // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.underaligned_int128_struct_member* // CHECK: br label %[[VAARG_END:[a-z._0-9]+]] // CHECK: [[VAARG_ON_STACK]] -// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 16 -// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.underaligned_int128_struct_member* // CHECK: br label %[[VAARG_END]] @@ -857,26 +857,26 @@ typedef struct { overaligned_int128_struct_member overaligned_int128_struct_member_test() { // CHECK-LABEL: define{{.*}} void @overaligned_int128_struct_member_test(%struct.overaligned_int128_struct_member* noalias sret(%struct.overaligned_int128_struct_member) align 32 %agg.result) return va_arg(the_list, overaligned_int128_struct_member); -// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: [[GR_OFFS:%[a-z_0-9]+]] = load i32, i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[EARLY_ONSTACK:%[a-z_0-9]+]] = icmp sge i32 [[GR_OFFS]], 0 // CHECK: br i1 [[EARLY_ONSTACK]], label %[[VAARG_ON_STACK:[a-z_.0-9]+]], label %[[VAARG_MAYBE_REG:[a-z_.0-9]+]] // CHECK: [[VAARG_MAYBE_REG]] // CHECK: [[NEW_REG_OFFS:%[a-z_0-9]+]] = add i32 [[GR_OFFS]], 8 -// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 3) +// CHECK: store i32 [[NEW_REG_OFFS]], i32* getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 3) // CHECK: [[INREG:%[a-z_0-9]+]] = icmp sle i32 [[NEW_REG_OFFS]], 0 // CHECK: br i1 [[INREG]], label %[[VAARG_IN_REG:[a-z_.0-9]+]], label %[[VAARG_ON_STACK]] // CHECK: [[VAARG_IN_REG]] -// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 1) +// CHECK: [[REG_TOP:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 1) // CHECK: [[REG_ADDR:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[REG_TOP]], i32 [[GR_OFFS]] // CHECK: [[FROMREG_ADDR:%[a-z_0-9]+]] = bitcast i8* [[REG_ADDR]] to %struct.overaligned_int128_struct_member** // CHECK: br label %[[VAARG_END:[a-z._0-9]+]] // CHECK: [[VAARG_ON_STACK]] -// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: [[STACK:%[a-z_0-9]+]] = load i8*, i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[NEW_STACK:%[a-z_0-9]+]] = getelementptr inbounds i8, i8* [[STACK]], i64 8 -// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%struct.__va_list, %struct.__va_list* @the_list, i32 0, i32 0) +// CHECK: store i8* [[NEW_STACK]], i8** getelementptr inbounds (%"struct.std::__va_list", %"struct.std::__va_list"* @the_list, i32 0, i32 0) // CHECK: [[FROMSTACK_ADDR:%[a-z_0-9]+]] = bitcast i8* [[STACK]] to %struct.overaligned_int128_struct_member** // CHECK: br label %[[VAARG_END]] @@ -889,8 +889,8 @@ void check_start(int n, ...) { va_list the_list; va_start(the_list, n); -// CHECK: [[THE_LIST:%[a-z_0-9]+]] = alloca %struct.__va_list -// CHECK: [[VOIDP_THE_LIST:%[a-z_0-9]+]] = bitcast %struct.__va_list* [[THE_LIST]] to i8* +// CHECK: [[THE_LIST:%[a-z_0-9]+]] = alloca %"struct.std::__va_list" +// CHECK: [[VOIDP_THE_LIST:%[a-z_0-9]+]] = bitcast %"struct.std::__va_list"* [[THE_LIST]] to i8* // CHECK: call void @llvm.va_start(i8* [[VOIDP_THE_LIST]]) } diff --git a/clang/test/CodeGen/altivec.c b/clang/test/CodeGen/altivec.c index 86b570f15d080..af239b54711c1 100644 --- a/clang/test/CodeGen/altivec.c +++ b/clang/test/CodeGen/altivec.c @@ -4,12 +4,12 @@ // RUN: %clang_cc1 -target-feature +altivec -triple powerpc64le-unknown-unknown -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-LE // RUN: %clang_cc1 -target-feature +altivec -mabi=vec-extabi -target-cpu pwr8 -triple powerpc-unknown-aix -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE // RUN: %clang_cc1 -target-feature +altivec -mabi=vec-extabi -target-cpu pwr8 -triple powerpc64-unknown-aix -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE -// RUN: not %clang_cc1 -target-feature +altivec -mabi=vec-default -target-cpu pwr8 -triple powerpc-unknown-aix -emit-llvm %s 2>&1 | FileCheck %s --check-prefix=AIX-ERROR -// RUN: not %clang_cc1 -target-feature +altivec -mabi=vec-default -target-cpu pwr8 -triple powerpc64-unknown-aix -emit-llvm %s 2>&1 | FileCheck %s --check-prefix=AIX-ERROR -// RUN: %clang -S -emit-llvm -maltivec -mabi=vec-extabi -mcpu=pwr8 -target powerpc-unknown-aix %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE -// RUN: %clang -S -emit-llvm -maltivec -mabi=vec-extabi -mcpu=pwr8 -target powerpc64-unknown-aix %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE -// RUN: not %clang -S -emit-llvm -maltivec -mabi=vec-default -mcpu=pwr8 -triple powerpc-unknown-aix -emit-llvm %s 2>&1 | FileCheck %s --check-prefix=AIX-ERROR -// RUN: not %clang -S -emit-llvm -maltivec -mabi=vec-default -mcpu=pwr8 -triple powerpc64-unknown-aix -emit-llvm %s 2>&1 | FileCheck %s --check-prefix=AIX-ERROR +// RUN: %clang_cc1 -target-feature +altivec -mabi=vec-default -target-cpu pwr8 -triple powerpc-unknown-aix -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE +// RUN: %clang_cc1 -target-feature +altivec -mabi=vec-default -target-cpu pwr8 -triple powerpc64-unknown-aix -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE +// RUN: %clang -S -emit-llvm -maltivec -mabi=vec-extabi -mcpu=pwr8 --target=powerpc-unknown-aix %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE +// RUN: %clang -S -emit-llvm -maltivec -mabi=vec-extabi -mcpu=pwr8 --target=powerpc64-unknown-aix %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE +// RUN: %clang -S -emit-llvm -maltivec -mabi=vec-default -mcpu=pwr8 --target=powerpc-unknown-aix -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE +// RUN: %clang -S -emit-llvm -maltivec -mabi=vec-default -mcpu=pwr8 --target=powerpc64-unknown-aix -emit-llvm %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-BE // Check initialization vector int test0 = (vector int)(1); // CHECK: @test0 ={{.*}} global <4 x i32> @@ -52,5 +52,3 @@ void test3() { vector float vf; vf++; // CHECK: fadd <4 x float> {{.*}} } - -// AIX-ERROR: error: The default Altivec ABI on AIX is not yet supported, use '-mabi=vec-extabi' for the extended Altivec ABI diff --git a/clang/test/CodeGen/arm64-be-hfa-vararg.c b/clang/test/CodeGen/arm64-be-hfa-vararg.c index c22572459bab5..2309de3dbc714 100644 --- a/clang/test/CodeGen/arm64-be-hfa-vararg.c +++ b/clang/test/CodeGen/arm64-be-hfa-vararg.c @@ -4,12 +4,12 @@ // A single member HFA must be aligned just like a non-HFA register argument. double callee(int a, ...) { -// CHECK: [[REGPP:%.*]] = getelementptr inbounds %struct.__va_list, %struct.__va_list* [[VA:%.*]], i32 0, i32 2 +// CHECK: [[REGPP:%.*]] = getelementptr inbounds %"struct.std::__va_list", %"struct.std::__va_list"* [[VA:%.*]], i32 0, i32 2 // CHECK: [[REGP:%.*]] = load i8*, i8** [[REGPP]], align 8 // CHECK: [[OFFSET0:%.*]] = getelementptr inbounds i8, i8* [[REGP]], i32 {{.*}} // CHECK: [[OFFSET1:%.*]] = getelementptr inbounds i8, i8* [[OFFSET0]], i64 8 -// CHECK: [[MEMPP:%.*]] = getelementptr inbounds %struct.__va_list, %struct.__va_list* [[VA:%.*]], i32 0, i32 0 +// CHECK: [[MEMPP:%.*]] = getelementptr inbounds %"struct.std::__va_list", %"struct.std::__va_list"* [[VA:%.*]], i32 0, i32 0 // CHECK: [[MEMP:%.*]] = load i8*, i8** [[MEMPP]], align 8 // CHECK: [[NEXTP:%.*]] = getelementptr inbounds i8, i8* [[MEMP]], i64 8 // CHECK: store i8* [[NEXTP]], i8** [[MEMPP]], align 8 diff --git a/clang/test/CodeGen/builtins-ppc-vsx.c b/clang/test/CodeGen/builtins-ppc-vsx.c index abd08d463e634..b5ddd03722ad0 100644 --- a/clang/test/CodeGen/builtins-ppc-vsx.c +++ b/clang/test/CodeGen/builtins-ppc-vsx.c @@ -2319,21 +2319,15 @@ void test_p8overloads_backwards_compat() { res_vsll = vec_add(vsll, vsll); // CHECK: add <4 x i32> // CHECK: call <4 x i32> @llvm.ppc.altivec.vaddcuw - // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> + // CHECK: shufflevector <16 x i8> {{%.*}}, <16 x i8> {{%.*}}, <16 x i32> // CHECK: add <4 x i32> - // CHECK-LE: add <4 x i32> - // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vaddcuw - // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> - // CHECK-LE: add <4 x i32> + // CHECK-LE: add <2 x i64> res_vull = vec_add(vull, vull); // CHECK: add <4 x i32> // CHECK: call <4 x i32> @llvm.ppc.altivec.vaddcuw - // CHECK: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> + // CHECK: shufflevector <16 x i8> {{%.*}}, <16 x i8> {{%.*}}, <16 x i32> // CHECK: add <4 x i32> - // CHECK-LE: add <4 x i32> - // CHECK-LE: call <4 x i32> @llvm.ppc.altivec.vaddcuw - // CHECK-LE: shufflevector <4 x i32> {{%.*}}, <4 x i32> {{%.*}}, <4 x i32> - // CHECK-LE: add <4 x i32> + // CHECK-LE: add <2 x i64> dummy(); // CHECK: call void @dummy() // CHECK-LE: call void @dummy() diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-cas-error.c b/clang/test/CodeGen/builtins-ppc-xlcompat-cas-error.c new file mode 100644 index 0000000000000..c35c54d6b1858 --- /dev/null +++ b/clang/test/CodeGen/builtins-ppc-xlcompat-cas-error.c @@ -0,0 +1,19 @@ +// REQUIRES: powerpc-registered-target +// RUN: %clang_cc1 -triple powerpc64-unknown-aix -target-cpu pwr8 \ +// RUN: -verify %s + +void test_builtin_ppc_compare_and_swap() { + volatile int a = 0; + long b = 0, c = 0; + + __compare_and_swap(&a, &b, c); // expected-warning {{incompatible pointer types passing 'long *' to parameter of type 'int *'}} + +} + +void test_builtin_ppc_compare_and_swaplp() { + volatile long a = 0; + int b = 0, c = 0; + + __compare_and_swaplp(&a, &b, c);// expected-warning {{incompatible pointer types passing 'int *' to parameter of type 'long *'}} + +} diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-cas.c b/clang/test/CodeGen/builtins-ppc-xlcompat-cas.c new file mode 100644 index 0000000000000..ea4b349d9a523 --- /dev/null +++ b/clang/test/CodeGen/builtins-ppc-xlcompat-cas.c @@ -0,0 +1,47 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: powerpc-registered-target +// RUN: %clang_cc1 -triple powerpc64-unknown-unknown \ +// RUN: -emit-llvm %s -o - -target-cpu pwr8 | FileCheck %s +// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown \ +// RUN: -emit-llvm %s -o - -target-cpu pwr8 | FileCheck %s + + +// CHECK-LABEL: @test_builtin_ppc_compare_and_swap( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +// CHECK-NEXT: store i32 [[B:%.*]], i32* [[B_ADDR]], align 4 +// CHECK-NEXT: store i32 [[C:%.*]], i32* [[C_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[C_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg weak volatile i32* [[A_ADDR]], i32 [[TMP1]], i32 [[TMP0]] monotonic monotonic, align 4 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i32, i1 } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i32, i1 } [[TMP2]], 1 +// CHECK-NEXT: ret void +// +void test_builtin_ppc_compare_and_swap(int a, int b, int c) { + __compare_and_swap(&a, &b, c); +} + + +// CHECK-LABEL: @test_builtin_ppc_compare_and_swaplp( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: store i64 [[A:%.*]], i64* [[A_ADDR]], align 8 +// CHECK-NEXT: store i64 [[B:%.*]], i64* [[B_ADDR]], align 8 +// CHECK-NEXT: store i64 [[C:%.*]], i64* [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[C_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = cmpxchg weak volatile i64* [[A_ADDR]], i64 [[TMP1]], i64 [[TMP0]] monotonic monotonic, align 8 +// CHECK-NEXT: [[TMP3:%.*]] = extractvalue { i64, i1 } [[TMP2]], 0 +// CHECK-NEXT: [[TMP4:%.*]] = extractvalue { i64, i1 } [[TMP2]], 1 +// CHECK-NEXT: ret void +// +void test_builtin_ppc_compare_and_swaplp(long a, long b, long c) { + __compare_and_swaplp(&a, &b, c); +} + diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-fetch-error.c b/clang/test/CodeGen/builtins-ppc-xlcompat-fetch-error.c new file mode 100644 index 0000000000000..a5124e3c10e93 --- /dev/null +++ b/clang/test/CodeGen/builtins-ppc-xlcompat-fetch-error.c @@ -0,0 +1,17 @@ +// REQUIRES: powerpc-registered-target +// RUN: %clang_cc1 -triple powerpc64-unknown-aix -target-cpu pwr8 \ +// RUN: -verify %s + +void test_builtin_ppc_fetch_and_add2() { + volatile int a = 0; + unsigned int b = 0; + + __fetch_and_add(&a, b); // expected-warning {{passing 'volatile int *' to parameter of type 'volatile unsigned int *' converts between pointers to integer types with different sign}} +} + +void test_builtin_ppc_fetch_and_addlp() { + volatile long a = 0; + unsigned long b = 0; + + __fetch_and_addlp(&a, b); // expected-warning {{passing 'volatile long *' to parameter of type 'volatile unsigned long *' converts between pointers to integer types with different sign}} +} diff --git a/clang/test/CodeGen/builtins-ppc-xlcompat-fetch.c b/clang/test/CodeGen/builtins-ppc-xlcompat-fetch.c new file mode 100644 index 0000000000000..7d0b674b39c25 --- /dev/null +++ b/clang/test/CodeGen/builtins-ppc-xlcompat-fetch.c @@ -0,0 +1,120 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py +// REQUIRES: powerpc-registered-target +// RUN: %clang_cc1 -triple powerpc64-unknown-unknown \ +// RUN: -emit-llvm %s -o - -target-cpu pwr8 | FileCheck %s +// RUN: %clang_cc1 -triple powerpc64le-unknown-unknown \ +// RUN: -emit-llvm %s -o - -target-cpu pwr8 | FileCheck %s + +// CHECK-LABEL: @test_builtin_ppc_fetch_and_add( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +// CHECK-NEXT: store i32 [[B:%.*]], i32* [[B_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw add i32* [[A_ADDR]], i32 [[TMP1]] monotonic, align 4 +// CHECK-NEXT: ret void +// +void test_builtin_ppc_fetch_and_add(unsigned int a, unsigned int b) { + __fetch_and_add(&a, b); +} + +// CHECK-LABEL: @test_builtin_ppc_fetch_and_addlp( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: store i64 [[A:%.*]], i64* [[A_ADDR]], align 8 +// CHECK-NEXT: store i64 [[B:%.*]], i64* [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw add i64* [[A_ADDR]], i64 [[TMP1]] monotonic, align 8 +// CHECK-NEXT: ret void +// +void test_builtin_ppc_fetch_and_addlp(unsigned long a, unsigned long b) { + __fetch_and_addlp(&a, b); +} +// CHECK-LABEL: @test_builtin_ppc_fetch_and_and( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +// CHECK-NEXT: store i32 [[B:%.*]], i32* [[B_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and i32* [[A_ADDR]], i32 [[TMP1]] monotonic, align 4 +// CHECK-NEXT: ret void +// +void test_builtin_ppc_fetch_and_and(unsigned int a, unsigned int b) { + __fetch_and_and(&a, b); +} +// CHECK-LABEL: @test_builtin_ppc_fetch_and_andlp( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: store i64 [[A:%.*]], i64* [[A_ADDR]], align 8 +// CHECK-NEXT: store i64 [[B:%.*]], i64* [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw and i64* [[A_ADDR]], i64 [[TMP1]] monotonic, align 8 +// CHECK-NEXT: ret void +// +void test_builtin_ppc_fetch_and_andlp(unsigned long a, unsigned long b) { + __fetch_and_andlp(&a, b); +} +// CHECK-LABEL: @test_builtin_ppc_fetch_and_or( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +// CHECK-NEXT: store i32 [[B:%.*]], i32* [[B_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw or i32* [[A_ADDR]], i32 [[TMP1]] monotonic, align 4 +// CHECK-NEXT: ret void +// +void test_builtin_ppc_fetch_and_or(unsigned int a, unsigned int b) { + __fetch_and_or(&a, b); +} +// CHECK-LABEL: @test_builtin_ppc_fetch_and_orlp( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: store i64 [[A:%.*]], i64* [[A_ADDR]], align 8 +// CHECK-NEXT: store i64 [[B:%.*]], i64* [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw or i64* [[A_ADDR]], i64 [[TMP1]] monotonic, align 8 +// CHECK-NEXT: ret void +// +void test_builtin_ppc_fetch_and_orlp(unsigned long a, unsigned long b) { + __fetch_and_orlp(&a, b); +} +// CHECK-LABEL: @test_builtin_ppc_fetch_and_swap( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32 [[A:%.*]], i32* [[A_ADDR]], align 4 +// CHECK-NEXT: store i32 [[B:%.*]], i32* [[B_ADDR]], align 4 +// CHECK-NEXT: [[TMP0:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK-NEXT: [[TMP1:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw xchg i32* [[A_ADDR]], i32 [[TMP1]] monotonic, align 4 +// CHECK-NEXT: ret void +// +void test_builtin_ppc_fetch_and_swap(unsigned int a, unsigned int b) { + __fetch_and_swap(&a, b); +} +// CHECK-LABEL: @test_builtin_ppc_fetch_and_swaplp( +// CHECK-NEXT: entry: +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: store i64 [[A:%.*]], i64* [[A_ADDR]], align 8 +// CHECK-NEXT: store i64 [[B:%.*]], i64* [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i64, i64* [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i64, i64* [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = atomicrmw xchg i64* [[A_ADDR]], i64 [[TMP1]] monotonic, align 8 +// CHECK-NEXT: ret void +// +void test_builtin_ppc_fetch_and_swaplp(unsigned long a, unsigned long b) { + __fetch_and_swaplp(&a, b); +} diff --git a/clang/test/CodeGen/cfi-icall-va-list.c b/clang/test/CodeGen/cfi-icall-va-list.c new file mode 100644 index 0000000000000..ff24e99899238 --- /dev/null +++ b/clang/test/CodeGen/cfi-icall-va-list.c @@ -0,0 +1,7 @@ +// RUN: %clang_cc1 -triple aarch64-unknown-linux -fsanitize=cfi-icall -fsanitize-trap=cfi-icall -emit-llvm -o - %s | FileCheck %s + +// CHECK: define dso_local void @f({{.*}} !type [[TYPE:![0-9]+]] !type [[TYPE_GENERALIZED:![0-9]+]] +void f(__builtin_va_list l) {} + +// CHECK-DAG: [[TYPE]] = !{i64 0, !"_ZTSFvSt9__va_listE"} +// CHECK-DAG: [[TYPE_GENERALIZED]] = !{i64 0, !"_ZTSFvSt9__va_listE.generalized"} diff --git a/clang/test/CodeGen/no_profile.c b/clang/test/CodeGen/no_profile.c index 50ca71f4fa0ed..9c9524338fe59 100644 --- a/clang/test/CodeGen/no_profile.c +++ b/clang/test/CodeGen/no_profile.c @@ -8,7 +8,7 @@ // RUN: -emit-llvm -o - %s | FileCheck %s int g(int); -void __attribute__((no_profile)) no_instr() { +void __attribute__((no_profile_instrument_function)) no_instr() { // CHECK: define {{.*}}void @no_instr() [[ATTR:#[0-9]+]] } diff --git a/clang/test/CodeGen/sanitize-coverage-old-pm.c b/clang/test/CodeGen/sanitize-coverage-old-pm.c index 610dd651587f9..ff37eda464a85 100644 --- a/clang/test/CodeGen/sanitize-coverage-old-pm.c +++ b/clang/test/CodeGen/sanitize-coverage-old-pm.c @@ -6,7 +6,7 @@ // RUN: %clang %s -target x86_64-unknown-linux-gnu -emit-llvm -S -fsanitize=undefined -fsanitize-coverage=trace-pc,trace-cmp -o - -flegacy-pass-manager | FileCheck %s --check-prefixes=CHECK,UBSAN // // Host armv7 is currently unsupported: https://bugs.llvm.org/show_bug.cgi?id=46117 -// XFAIL: armv7, thumbv7 +// UNSUPPORTED: armv7, thumbv7, armv8l // The same issue also occurs on a riscv32 host. // XFAIL: riscv32 diff --git a/clang/test/CodeGenCUDA/managed-var.cu b/clang/test/CodeGenCUDA/managed-var.cu index 99bbad924ea54..05a7a69387690 100644 --- a/clang/test/CodeGenCUDA/managed-var.cu +++ b/clang/test/CodeGenCUDA/managed-var.cu @@ -146,7 +146,7 @@ float load3() { // HOST: %3 = getelementptr inbounds [100 x %struct.vec], [100 x %struct.vec]* %2, i64 0, i64 1, i32 1 // HOST: %4 = ptrtoint float* %3 to i64 // HOST: %5 = sub i64 %4, %1 -// HOST: %6 = sdiv i64 %5, 4 +// HOST: %6 = sdiv exact i64 %5, 4 // HOST: %7 = sitofp i64 %6 to float // HOST: ret float %7 float addr_taken2() { diff --git a/clang/test/CodeGenCXX/debug-info-blocks.cpp b/clang/test/CodeGenCXX/debug-info-blocks.cpp index 7eea3ce09649f..e22594cb5d6ac 100644 --- a/clang/test/CodeGenCXX/debug-info-blocks.cpp +++ b/clang/test/CodeGenCXX/debug-info-blocks.cpp @@ -12,9 +12,7 @@ void test() { ^{ (void)a; }; } -// CHECK: !DISubprogram(name: "__Block_byref_object_copy_", -// CHECK-SAME: line: 11, +// CHECK: !DISubprogram(linkageName: "__Block_byref_object_copy_", // CHECK-SAME: DISPFlagLocalToUnit | DISPFlagDefinition -// CHECK: !DISubprogram(name: "__Block_byref_object_dispose_", -// CHECK-SAME: line: 11, +// CHECK: !DISubprogram(linkageName: "__Block_byref_object_dispose_", // CHECK-SAME: DISPFlagLocalToUnit | DISPFlagDefinition diff --git a/clang/test/CodeGenCXX/new.cpp b/clang/test/CodeGenCXX/new.cpp index 2181534a6beb4..3142dba4bf683 100644 --- a/clang/test/CodeGenCXX/new.cpp +++ b/clang/test/CodeGenCXX/new.cpp @@ -176,6 +176,7 @@ void t13(int n) { struct Alloc{ int x; void* operator new[](size_t size); + __attribute__((returns_nonnull)) void *operator new[](size_t size, const std::nothrow_t &) throw(); void operator delete[](void* p); ~Alloc(); }; @@ -186,6 +187,10 @@ void f() { // CHECK: call void @_ZN5AllocD1Ev( // CHECK: call void @_ZN5AllocdaEPv(i8* delete[] new Alloc[10][20]; + // CHECK: [[P:%.*]] = call nonnull i8* @_ZN5AllocnaEmRKSt9nothrow_t(i64 808, {{.*}}) [[ATTR_NOUNWIND:#[^ ]*]] + // CHECK-NOT: icmp eq i8* [[P]], null + // CHECK: store i64 200 + delete[] new (nothrow) Alloc[10][20]; // CHECK: call noalias nonnull i8* @_Znwm // CHECK: call void @_ZdlPv(i8* delete new bool; @@ -328,7 +333,7 @@ namespace N3664 { // CHECK: call void @_ZdaPv({{.*}}) [[ATTR_BUILTIN_DELETE]] delete[] p; // expected-warning {{'delete[]' applied to a pointer that was allocated with 'new'; did you mean 'delete'?}} - // CHECK: call noalias i8* @_ZnamRKSt9nothrow_t(i64 3, {{.*}}) [[ATTR_BUILTIN_NOTHROW_NEW:#[^ ]*]] + // CHECK: call noalias i8* @_ZnamRKSt9nothrow_t(i64 3, {{.*}}) [[ATTR_NOBUILTIN_NOUNWIND_ALLOCSIZE:#[^ ]*]] (void) new (nothrow) S[3]; // CHECK: call i8* @_Znwm15MyPlacementType(i64 4){{$}} diff --git a/clang/test/CodeGenObjC/block-byref-debuginfo.m b/clang/test/CodeGenObjC/block-byref-debuginfo.m index a145b28bb49f8..15ff53e0635ff 100644 --- a/clang/test/CodeGenObjC/block-byref-debuginfo.m +++ b/clang/test/CodeGenObjC/block-byref-debuginfo.m @@ -1,5 +1,8 @@ // RUN: %clang_cc1 -fblocks -fobjc-arc -fobjc-runtime-has-weak -debug-info-kind=limited -triple x86_64-apple-darwin -emit-llvm %s -o - | FileCheck %s +// CHECK: define internal void @__Block_byref_object_copy_({{.*}} !dbg ![[BYREF_COPY_SP:.*]] { +// CHECK: getelementptr inbounds {{.*}}, !dbg ![[BYREF_COPY_LOC:.*]] + // CHECK: !DILocalVariable(name: "foo", {{.*}}type: ![[FOOTY:[0-9]+]]) // CHECK: ![[FOOTY]] = {{.*}}!DICompositeType({{.*}}, name: "Foo" @@ -24,12 +27,23 @@ // CHECK: !DILocalVariable(name: "foo", {{.*}}type: ![[FOOTY]]) +// CHECK: ![[BYREF_COPY_SP]] = distinct !DISubprogram(linkageName: "__Block_byref_object_copy_", +// CHECK: ![[BYREF_COPY_LOC]] = !DILocation(line: 0, scope: ![[BYREF_COPY_SP]]) struct Foo { unsigned char *data; }; + +struct Foo2 { + id f0; +}; + +void (^bptr)(void); + int func() { __attribute__((__blocks__(byref))) struct Foo foo; ^{ foo.data = 0; }(); + __block struct Foo2 foo2; + bptr = ^{ foo2.f0 =0; }; return 0; } diff --git a/clang/test/CodeGenObjC/debug-info-block-helper.m b/clang/test/CodeGenObjC/debug-info-block-helper.m index 1d37ea44ec6f7..914962897fd4a 100644 --- a/clang/test/CodeGenObjC/debug-info-block-helper.m +++ b/clang/test/CodeGenObjC/debug-info-block-helper.m @@ -2,7 +2,7 @@ // RUN: %clang_cc1 -emit-llvm -fblocks -debug-info-kind=limited -triple x86_64-apple-darwin10 -fobjc-runtime=macosx-fragile-10.5 %s -o - | FileCheck %s extern void foo(void(^)(void)); -// CHECK: !DISubprogram(name: "__destroy_helper_block_8_32o40r48r" +// CHECK: !DISubprogram(linkageName: "__destroy_helper_block_8_32o40r48r" @interface NSObject { struct objc_object *isa; diff --git a/clang/test/CodeGenObjC/debug-info-blocks.m b/clang/test/CodeGenObjC/debug-info-blocks.m index 257045b05c32b..64392e2d8bc5a 100644 --- a/clang/test/CodeGenObjC/debug-info-blocks.m +++ b/clang/test/CodeGenObjC/debug-info-blocks.m @@ -25,9 +25,9 @@ // CHECK: ret {{.*}}, !dbg ![[DESTROY_LINE]] // CHECK-DAG: [[DBG_LINE]] = !DILocation(line: 0, scope: ![[COPY_SP:[0-9]+]]) -// CHECK-DAG: [[COPY_SP]] = distinct !DISubprogram(name: "__copy_helper_block_ +// CHECK-DAG: [[COPY_SP]] = distinct !DISubprogram(linkageName: "__copy_helper_block_ // CHECK-DAG: [[DESTROY_LINE]] = !DILocation(line: 0, scope: ![[DESTROY_SP:[0-9]+]]) -// CHECK-DAG: [[DESTROY_SP]] = distinct !DISubprogram(name: "__destroy_helper_block_ +// CHECK-DAG: [[DESTROY_SP]] = distinct !DISubprogram(linkageName: "__destroy_helper_block_ typedef unsigned int NSUInteger; @protocol NSObject diff --git a/clang/test/CodeGenOpenCL/amdgpu-features.cl b/clang/test/CodeGenOpenCL/amdgpu-features.cl index b4b86d75c9ae7..a532da344df1a 100644 --- a/clang/test/CodeGenOpenCL/amdgpu-features.cl +++ b/clang/test/CodeGenOpenCL/amdgpu-features.cl @@ -34,6 +34,7 @@ // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1032 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1032 %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1033 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1033 %s // RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1034 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1034 %s +// RUN: %clang_cc1 -triple amdgcn -target-cpu gfx1035 -S -emit-llvm -o - %s | FileCheck --check-prefix=GFX1035 %s // GFX600: "target-features"="+s-memtime-inst" // GFX601: "target-features"="+s-memtime-inst" @@ -66,5 +67,6 @@ // GFX1032: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" // GFX1033: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" // GFX1034: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" +// GFX1035: "target-features"="+16-bit-insts,+ci-insts,+dl-insts,+dot1-insts,+dot2-insts,+dot5-insts,+dot6-insts,+dot7-insts,+dpp,+flat-address-space,+gfx10-3-insts,+gfx10-insts,+gfx8-insts,+gfx9-insts,+s-memrealtime,+s-memtime-inst" kernel void test() {} diff --git a/clang/test/CodeGenOpenCL/debug-info-programming-language.cl b/clang/test/CodeGenOpenCL/debug-info-programming-language.cl new file mode 100644 index 0000000000000..fc57a41e5a299 --- /dev/null +++ b/clang/test/CodeGenOpenCL/debug-info-programming-language.cl @@ -0,0 +1,17 @@ +// RUN: %clang_cc1 -dwarf-version=5 -emit-llvm -triple %itanium_abi_triple %s -o - \ +// RUN: -x cl -O0 -disable-llvm-passes -debug-info-kind=limited \ +// RUN: | FileCheck --check-prefix=CHECK-OPENCL %s +// RUN: %clang_cc1 -dwarf-version=3 -emit-llvm -triple %itanium_abi_triple %s -o - \ +// RUN: -x cl -O0 -disable-llvm-passes -debug-info-kind=limited \ +// RUN: | FileCheck --check-prefix=CHECK-OPENCL %s +// RUN: %clang_cc1 -dwarf-version=3 -gstrict-dwarf -emit-llvm -triple %itanium_abi_triple %s -o - \ +// RUN: -x cl -O0 -disable-llvm-passes -debug-info-kind=limited \ +// RUN: | FileCheck --check-prefix=CHECK-C99 %s +// RUN: %clang_cc1 -dwarf-version=5 -gstrict-dwarf -emit-llvm -triple %itanium_abi_triple %s -o - \ +// RUN: -x cl -O0 -disable-llvm-passes -debug-info-kind=limited \ +// RUN: | FileCheck --check-prefix=CHECK-OPENCL %s + +kernel void empty() {} + +// CHECK-OPENCL: distinct !DICompileUnit(language: DW_LANG_OpenCL, +// CHECK-C99: distinct !DICompileUnit(language: DW_LANG_C99, diff --git a/clang/test/Driver/aarch64-target-as-march.s b/clang/test/Driver/aarch64-target-as-march.s new file mode 100644 index 0000000000000..a9301ade43351 --- /dev/null +++ b/clang/test/Driver/aarch64-target-as-march.s @@ -0,0 +1,46 @@ +/// These tests make sure that options passed to the assembler +/// via -Wa or -Xassembler are applied correctly to assembler inputs. + +/// Does not apply to non assembly files +// RUN: %clang --target=aarch64-linux-gnueabi -### -c -Wa,-march=armv8.1-a \ +// RUN: %S/Inputs/wildcard1.c 2>&1 | FileCheck --check-prefix=TARGET-FEATURE-1 %s +// RUN: %clang --target=aarch64-linux-gnueabi -### -c -Xassembler -march=armv8.1-a \ +// RUN: %S/Inputs/wildcard1.c 2>&1 | FileCheck --check-prefix=TARGET-FEATURE-1 %s + +// TARGET-FEATURE-1-NOT: "-target-feature" "+v8.1a" + +/// Does apply to assembler input +// RUN: %clang --target=aarch64-linux-gnueabi -### -c -Wa,-march=armv8.2-a %s 2>&1 | \ +// RUN: FileCheck --check-prefix=TARGET-FEATURE-2 %s +// RUN: %clang --target=aarch64-linux-gnueabi -### -c -Xassembler -march=armv8.2-a %s 2>&1 | \ +// RUN: FileCheck --check-prefix=TARGET-FEATURE-2 %s + +// TARGET-FEATURE-2: "-target-feature" "+v8.2a" + +/// No unused argument warnings when there are multiple values +// RUN: %clang --target=aarch64-linux-gnueabi -### -c -Wa,-march=armv8.1-a -Wa,-march=armv8.2-a %s 2>&1 | \ +// RUN: FileCheck --check-prefix=UNUSED-WARNING %s + +// UNUSED-WARNING-NOT: warning: argument unused during compilation + +/// Last march to assembler wins +// RUN: %clang --target=aarch64-linux-gnueabi -### -c -Wa,-march=armv8.2-a -Wa,-march=armv8.1-a %s 2>&1 | \ +// RUN: FileCheck --check-prefix=MULTIPLE-VALUES %s +// RUN: %clang --target=aarch64-linux-gnueabi -### -c -Wa,-march=armv8.2-a,-march=armv8.1-a %s 2>&1 | \ +// RUN: FileCheck --check-prefix=MULTIPLE-VALUES %s +// RUN: %clang --target=aarch64-linux-gnueabi -### -c -Xassembler -march=armv8.2-a -Xassembler \ +// RUN: -march=armv8.1-a %s 2>&1 | FileCheck --check-prefix=MULTIPLE-VALUES %s + +// MULTIPLE-VALUES: "-target-feature" "+v8.1a +// MULTIPLE-VALUES-NOT: "-target-feature" "+v8.2a + +/// march to compiler and assembler, we choose the one suited to the input file type +// RUN: %clang --target=aarch64-linux-gnueabi -### -c -Wa,-march=armv8.3-a -march=armv8.4-a %s 2>&1 | \ +// RUN: FileCheck --check-prefix=TARGET-FEATURE-3 %s +// RUN: %clang --target=aarch64-linux-gnueabi -### -c -Wa,-march=armv8.3-a -march=armv8.4-a \ +// RUN: %S/Inputs/wildcard1.c 2>&1 | FileCheck --check-prefix=TARGET-FEATURE-4 %s + +// TARGET-FEATURE-3: "-target-feature" "+v8.3a" +// TARGET-FEATURE-3-NOT: "-target-feature" "+v8.4a" +// TARGET-FEATURE-4: "-target-feature" "+v8.4a" +// TARGET-FEATURE-4-NOT: "-target-feature" "+v8.3a" diff --git a/clang/test/Driver/aix-ld.c b/clang/test/Driver/aix-ld.c index c66b235cb1e2c..c5f1061f03466 100644 --- a/clang/test/Driver/aix-ld.c +++ b/clang/test/Driver/aix-ld.c @@ -6,6 +6,7 @@ // RUN: -resource-dir=%S/Inputs/resource_dir \ // RUN: -target powerpc-ibm-aix7.1.0.0 \ // RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: -unwindlib=libunwind \ // RUN: | FileCheck --check-prefix=CHECK-LD32 %s // CHECK-LD32-NOT: warning: // CHECK-LD32: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc-ibm-aix7.1.0.0" @@ -31,6 +32,7 @@ // RUN: -resource-dir=%S/Inputs/resource_dir \ // RUN: -target powerpc64-ibm-aix7.1.0.0 \ // RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: -unwindlib=libunwind \ // RUN: | FileCheck --check-prefix=CHECK-LD64 %s // CHECK-LD64-NOT: warning: // CHECK-LD64: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc64-ibm-aix7.1.0.0" @@ -57,6 +59,7 @@ // RUN: -pthread \ // RUN: -target powerpc-ibm-aix7.1.0.0 \ // RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: -unwindlib=libunwind \ // RUN: | FileCheck --check-prefix=CHECK-LD32-PTHREAD %s // CHECK-LD32-PTHREAD-NOT: warning: // CHECK-LD32-PTHREAD: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc-ibm-aix7.1.0.0" @@ -84,6 +87,7 @@ // RUN: -pthreads \ // RUN: -target powerpc64-ibm-aix7.1.0.0 \ // RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: -unwindlib=libunwind \ // RUN: | FileCheck --check-prefix=CHECK-LD64-PTHREAD %s // CHECK-LD64-PTHREAD-NOT: warning: // CHECK-LD64-PTHREAD: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc64-ibm-aix7.1.0.0" @@ -111,6 +115,7 @@ // RUN: -p \ // RUN: -target powerpc-ibm-aix7.1.0.0 \ // RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: -unwindlib=libunwind \ // RUN: | FileCheck --check-prefix=CHECK-LD32-PROF %s // CHECK-LD32-PROF-NOT: warning: // CHECK-LD32-PROF: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc-ibm-aix7.1.0.0" @@ -137,6 +142,7 @@ // RUN: -pg \ // RUN: -target powerpc64-ibm-aix7.1.0.0 \ // RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: -unwindlib=libunwind \ // RUN: | FileCheck --check-prefix=CHECK-LD64-GPROF %s // CHECK-LD64-GPROF-NOT: warning: // CHECK-LD64-GPROF: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc64-ibm-aix7.1.0.0" @@ -163,6 +169,7 @@ // RUN: -static \ // RUN: -target powerpc-ibm-aix7.1.0.0 \ // RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: -unwindlib=libunwind \ // RUN: | FileCheck --check-prefix=CHECK-LD32-STATIC %s // CHECK-LD32-STATIC-NOT: warning: // CHECK-LD32-STATIC: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc-ibm-aix7.1.0.0" @@ -189,6 +196,7 @@ // RUN: -L%S/Inputs/aix_ppc_tree/powerpc-ibm-aix7.1.0.0 \ // RUN: -target powerpc-ibm-aix7.1.0.0 \ // RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: -unwindlib=libunwind \ // RUN: | FileCheck --check-prefix=CHECK-LD32-LIBP %s // CHECK-LD32-LIBP-NOT: warning: // CHECK-LD32-LIBP: {{.*}}clang{{(.exe)?}}" "-cc1" "-triple" "powerpc-ibm-aix7.1.0.0" @@ -272,6 +280,7 @@ // RUN: -Wl,-bnocdtors \ // RUN: -target powerpc-ibm-aix7.1.0.0 \ // RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: -unwindlib=libunwind \ // RUN: | FileCheck --check-prefix=CHECK-LD32-ARG-ORDER %s // CHECK-LD32-ARG-ORDER: {{.*}}clang{{.*}}" "-cc1" "-triple" "powerpc-ibm-aix7.1.0.0" // CHECK-LD32-ARG-ORDER: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" @@ -300,6 +309,7 @@ // RUN: -Wl,-bnocdtors \ // RUN: -target powerpc-ibm-aix7.1.0.0 \ // RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: -unwindlib=libunwind \ // RUN: | FileCheck --check-prefix=CHECK-LD32-CXX-ARG-ORDER %s // CHECK-LD32-CXX-ARG-ORDER: {{.*}}clang{{.*}}" "-cc1" "-triple" "powerpc-ibm-aix7.1.0.0" // CHECK-LD32-CXX-ARG-ORDER: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" @@ -327,6 +337,7 @@ // RUN: -resource-dir=%S/Inputs/resource_dir \ // RUN: -target powerpc-ibm-aix7.1.0.0 \ // RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: -unwindlib=libunwind \ // RUN: | FileCheck --check-prefix=CHECK-LD32-CXX-ARG-LCXX %s // CHECK-LD32-CXX-ARG-LCXX: {{.*}}clang{{.*}}" "-cc1" "-triple" "powerpc-ibm-aix7.1.0.0" // CHECK-LD32-CXX-ARG-LCXX: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" @@ -350,6 +361,7 @@ // RUN: -resource-dir=%S/Inputs/resource_dir \ // RUN: -target powerpc64-ibm-aix7.1.0.0 \ // RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: -unwindlib=libunwind \ // RUN: | FileCheck --check-prefix=CHECK-LD64-CXX-ARG-LCXX %s // CHECK-LD64-CXX-ARG-LCXX: {{.*}}clang{{.*}}" "-cc1" "-triple" "powerpc64-ibm-aix7.1.0.0" // CHECK-LD64-CXX-ARG-LCXX: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" @@ -374,6 +386,7 @@ // RUN: -nodefaultlibs \ // RUN: -target powerpc-ibm-aix7.1.0.0 \ // RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: -unwindlib=libunwind \ // RUN: | FileCheck --check-prefix=CHECK-LD32-NODEFLIB-LCXX %s // CHECK-LD32-NODEFLIB-LCXX: {{.*}}clang{{.*}}" "-cc1" "-triple" "powerpc-ibm-aix7.1.0.0" // CHECK-LD32-NODEFLIB-LCXX: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" @@ -398,6 +411,7 @@ // RUN: -nodefaultlibs \ // RUN: -target powerpc64-ibm-aix7.1.0.0 \ // RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: -unwindlib=libunwind \ // RUN: | FileCheck --check-prefix=CHECK-LD64-NODEFLIB-LCXX %s // CHECK-LD64-NODEFLIB-LCXX: {{.*}}clang{{.*}}" "-cc1" "-triple" "powerpc64-ibm-aix7.1.0.0" // CHECK-LD64-NODEFLIB-LCXX: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" @@ -422,6 +436,7 @@ // RUN: -nostdlib \ // RUN: -target powerpc-ibm-aix7.1.0.0 \ // RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: -unwindlib=libunwind \ // RUN: | FileCheck --check-prefix=CHECK-LD32-NOSTDLIB-LCXX %s // CHECK-LD32-NOSTDLIB-LCXX: {{.*}}clang{{.*}}" "-cc1" "-triple" "powerpc-ibm-aix7.1.0.0" // CHECK-LD32-NOSTDLIB-LCXX: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" @@ -446,6 +461,7 @@ // RUN: -nostdlib \ // RUN: -target powerpc64-ibm-aix7.1.0.0 \ // RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: -unwindlib=libunwind \ // RUN: | FileCheck --check-prefix=CHECK-LD64-NOSTDLIB-LCXX %s // CHECK-LD64-NOSTDLIB-LCXX: {{.*}}clang{{.*}}" "-cc1" "-triple" "powerpc64-ibm-aix7.1.0.0" // CHECK-LD64-NOSTDLIB-LCXX: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" @@ -471,6 +487,7 @@ // RUN: -nostdlib++ \ // RUN: -target powerpc-ibm-aix7.1.0.0 \ // RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: -unwindlib=libunwind \ // RUN: | FileCheck --check-prefix=CHECK-LD32-NOSTDLIBXX-LCXX %s // CHECK-LD32-NOSTDLIBXX-LCXX: {{.*}}clang{{.*}}" "-cc1" "-triple" "powerpc-ibm-aix7.1.0.0" // CHECK-LD32-NOSTDLIBXX-LCXX: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" @@ -495,6 +512,7 @@ // RUN: -nostdlib++ \ // RUN: -target powerpc64-ibm-aix7.1.0.0 \ // RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: -unwindlib=libunwind \ // RUN: | FileCheck --check-prefix=CHECK-LD64-NOSTDLIBXX-LCXX %s // CHECK-LD64-NOSTDLIBXX-LCXX: {{.*}}clang{{.*}}" "-cc1" "-triple" "powerpc64-ibm-aix7.1.0.0" // CHECK-LD64-NOSTDLIBXX-LCXX: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" @@ -519,6 +537,7 @@ // RUN: -nostartfiles \ // RUN: -target powerpc-ibm-aix7.1.0.0 \ // RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: -unwindlib=libunwind \ // RUN: | FileCheck --check-prefix=CHECK-LD32-NOSTARTFILES-LCXX %s // CHECK-LD32-NOSTARTFILES-LCXX: {{.*}}clang{{.*}}" "-cc1" "-triple" "powerpc-ibm-aix7.1.0.0" // CHECK-LD32-NOSTARTFILES-LCXX: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" @@ -543,6 +562,7 @@ // RUN: -nostartfiles \ // RUN: -target powerpc-ibm-aix7.1.0.0 \ // RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: -unwindlib=libunwind \ // RUN: | FileCheck --check-prefix=CHECK-LD32-NOSTARTFILES-LCXX %s // CHECK-LD64-NOSTARTFILES-LCXX: {{.*}}clang{{.*}}" "-cc1" "-triple" "powerpc64-ibm-aix7.1.0.0" // CHECK-LD64-NOSTARTFILES-LCXX: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" @@ -582,6 +602,7 @@ // RUN: -shared \ // RUN: -target powerpc-ibm-aix7.1.0.0 \ // RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: -unwindlib=libunwind \ // RUN: | FileCheck --check-prefix=CHECK-LD32-SHARED %s // CHECK-LD32-SHARED: {{.*}}clang{{.*}}" "-cc1" "-triple" "powerpc-ibm-aix7.1.0.0" // CHECK-LD32-SHARED: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" @@ -608,6 +629,7 @@ // RUN: -shared \ // RUN: -target powerpc64-ibm-aix7.1.0.0 \ // RUN: --sysroot %S/Inputs/aix_ppc_tree \ +// RUN: -unwindlib=libunwind \ // RUN: | FileCheck --check-prefix=CHECK-LD64-SHARED %s // CHECK-LD64-SHARED: {{.*}}clang{{.*}}" "-cc1" "-triple" "powerpc64-ibm-aix7.1.0.0" // CHECK-LD64-SHARED: "-resource-dir" "[[RESOURCE_DIR:[^"]+]]" diff --git a/clang/test/Driver/aix-vec-extabi.c b/clang/test/Driver/aix-vec-extabi.c deleted file mode 100644 index ccc3b0732e4ba..0000000000000 --- a/clang/test/Driver/aix-vec-extabi.c +++ /dev/null @@ -1,10 +0,0 @@ -// RUN: %clang -### -target powerpc-unknown-aix -S -maltivec -mabi=vec-extabi %s 2>&1 | \ -// RUN: FileCheck %s - -// CHECK: "-cc1" -// CHECK-SAME: "-mabi=vec-extabi" - -// RUN: %clang -### -target powerpc-unknown-aix -S -maltivec -mabi=vec-default %s 2>&1 | \ -// RUN: FileCheck %s --check-prefix=ERROR - -// ERROR: The default Altivec ABI on AIX is not yet supported, use '-mabi=vec-extabi' for the extended Altivec ABI diff --git a/clang/test/Driver/aix-vec_extabi.c b/clang/test/Driver/aix-vec_extabi.c new file mode 100644 index 0000000000000..93de4a25e3e76 --- /dev/null +++ b/clang/test/Driver/aix-vec_extabi.c @@ -0,0 +1,16 @@ +// RUN: %clang -### -target powerpc-unknown-aix -S %s 2>&1 | \ +// RUN: FileCheck %s --implicit-check-not=vec-extabi +// RUN: %clang -### -target powerpc-unknown-aix -S -maltivec %s 2>&1 | \ +// RUN: FileCheck %s --implicit-check-not=vec-extabi +// RUN: %clang -### -target powerpc-unknown-aix -S -maltivec -mabi=vec-default %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=DFLTABI --implicit-check-not=vec-extabi +// RUN: %clang -### -target powerpc-unknown-aix -S -mabi=vec-extabi %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=EXTABI +// RUN: %clang -### -target powerpc-unknown-aix -S -maltivec -mabi=vec-extabi %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=EXTABI +/ +// EXTABI: "-cc1" +// EXTABI-SAME: "-mabi=vec-extabi" + +// DFLTABI: "-cc1" +// DFLTABI-SAME: "-mabi=vec-default" diff --git a/clang/test/Driver/amdgpu-macros.cl b/clang/test/Driver/amdgpu-macros.cl index bea4eab13fe5e..965fdda67945f 100644 --- a/clang/test/Driver/amdgpu-macros.cl +++ b/clang/test/Driver/amdgpu-macros.cl @@ -116,6 +116,7 @@ // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1032 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1032 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1033 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1033 // RUN: %clang -E -dM -target amdgcn -mcpu=gfx1034 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1034 +// RUN: %clang -E -dM -target amdgcn -mcpu=gfx1035 %s 2>&1 | FileCheck --check-prefixes=ARCH-GCN,FAST_FMAF %s -DWAVEFRONT_SIZE=32 -DCPU=gfx1035 // ARCH-GCN-DAG: #define FP_FAST_FMA 1 diff --git a/clang/test/Driver/amdgpu-mcpu.cl b/clang/test/Driver/amdgpu-mcpu.cl index 8b472206db7c2..432533b10d9dc 100644 --- a/clang/test/Driver/amdgpu-mcpu.cl +++ b/clang/test/Driver/amdgpu-mcpu.cl @@ -101,6 +101,7 @@ // RUN: %clang -### -target amdgcn -mcpu=gfx1032 %s 2>&1 | FileCheck --check-prefix=GFX1032 %s // RUN: %clang -### -target amdgcn -mcpu=gfx1033 %s 2>&1 | FileCheck --check-prefix=GFX1033 %s // RUN: %clang -### -target amdgcn -mcpu=gfx1034 %s 2>&1 | FileCheck --check-prefix=GFX1034 %s +// RUN: %clang -### -target amdgcn -mcpu=gfx1035 %s 2>&1 | FileCheck --check-prefix=GFX1035 %s // GCNDEFAULT-NOT: -target-cpu // GFX600: "-target-cpu" "gfx600" @@ -134,3 +135,4 @@ // GFX1032: "-target-cpu" "gfx1032" // GFX1033: "-target-cpu" "gfx1033" // GFX1034: "-target-cpu" "gfx1034" +// GFX1035: "-target-cpu" "gfx1035" diff --git a/clang/test/Driver/amdgpu-toolchain-opencl.cl b/clang/test/Driver/amdgpu-toolchain-opencl.cl index 3994387f3eadb..152eda1c46927 100644 --- a/clang/test/Driver/amdgpu-toolchain-opencl.cl +++ b/clang/test/Driver/amdgpu-toolchain-opencl.cl @@ -7,6 +7,12 @@ // RUN: %clang -### -target amdgcn-amd-amdhsa-opencl -x cl -c -emit-llvm -mcpu=fiji -Og %s 2>&1 | FileCheck -check-prefix=CHECK_Og %s // RUN: %clang -### -target amdgcn-amd-amdhsa-opencl -x cl -c -emit-llvm -mcpu=fiji -Ofast %s 2>&1 | FileCheck -check-prefix=CHECK_Ofast %s // RUN: %clang -### -target amdgcn-amd-amdhsa-opencl -x cl -c -emit-llvm -mcpu=fiji %s 2>&1 | FileCheck -check-prefix=CHECK_O_DEFAULT %s + +// Check default include file is not included for preprocessor output. + +// RUN: %clang -### -target amdgcn-amd-amdhsa-opencl -x cl -c -emit-llvm -mcpu=fiji %s 2>&1 | FileCheck -check-prefix=CHK-INC %s +// RUN: %clang -### -target amdgcn-amd-amdhsa-opencl -x cl -c -emit-llvm -mcpu=fiji -save-temps %s 2>&1 | FileCheck -check-prefix=CHK-INC %s + // CHECK_O0: clang{{.*}} "-O0" // CHECK_O1: clang{{.*}} "-O1" // CHECK_O2: clang{{.*}} "-O2" @@ -17,3 +23,5 @@ // CHECK_Ofast: {{.*}}clang{{.*}} "-Ofast" // CHECK_O_DEFAULT: clang{{.*}} "-O3" +// CHK-INC: clang{{.*}} "-cc1" {{.*}}"-finclude-default-header" "-fdeclare-opencl-builtins" {{.*}}"-x" "cl" +// CHK-INC-NOT: clang{{.*}} "-cc1" {{.*}}"-finclude-default-header" "-fdeclare-opencl-builtins" {{.*}}"-x" "cpp-output" diff --git a/clang/test/Driver/cl-include.c b/clang/test/Driver/cl-include.c index a69265deed02c..ca9e7db1e6f07 100644 --- a/clang/test/Driver/cl-include.c +++ b/clang/test/Driver/cl-include.c @@ -7,19 +7,37 @@ // RUN: %clang_cl -nobuiltininc -### -- %s 2>&1 | FileCheck %s --check-prefix=NOBUILTIN // NOBUILTIN-NOT: "-internal-isystem" "{{.*lib.*clang.*include}}" -// RUN: env INCLUDE=/my/system/inc %clang_cl -### -- %s 2>&1 | FileCheck %s --check-prefix=STDINC +// RUN: env INCLUDE=/my/system/inc env EXTERNAL_INCLUDE=/my/system/inc2 %clang_cl -### -- %s 2>&1 | FileCheck %s --check-prefix=STDINC // STDINC: "-internal-isystem" "/my/system/inc" +// STDINC: "-internal-isystem" "/my/system/inc2" // -nostdinc suppresses all of %INCLUDE%, clang resource dirs, and -imsvc dirs. -// RUN: env INCLUDE=/my/system/inc %clang_cl -nostdinc -imsvc /my/other/inc -### -- %s 2>&1 | FileCheck %s --check-prefix=NOSTDINC +// RUN: env INCLUDE=/my/system/inc env EXTERNAL_INCLUDE=/my/system/inc2 %clang_cl -nostdinc -imsvc /my/other/inc -### -- %s 2>&1 | FileCheck %s --check-prefix=NOSTDINC // NOSTDINC: argument unused{{.*}}-imsvc // NOSTDINC-NOT: "-internal-isystem" "/my/system/inc" +// NOSTDINC-NOT: "-internal-isystem" "/my/system/inc2" // NOSTDINC-NOT: "-internal-isystem" "{{.*lib.*clang.*include}}" // NOSTDINC-NOT: "-internal-isystem" "/my/other/inc" -// /X suppresses %INCLUDE% but not clang resource dirs or -imsvc dirs. -// RUN: env INCLUDE=/my/system/inc %clang_cl /X -imsvc /my/other/inc -### -- %s 2>&1 | FileCheck %s --check-prefix=SLASHX +// /X suppresses %INCLUDE% and %EXTERNAL_INCLUDE% but not clang resource dirs, -imsvc dirs, or /external: flags. +// RUN: env INCLUDE=/my/system/inc env EXTERNAL_INCLUDE=/my/system/inc2 env FOO=/my/other/inc2 %clang_cl /X -imsvc /my/other/inc /external:env:FOO -### -- %s 2>&1 | FileCheck %s --check-prefix=SLASHX // SLASHX-NOT: "argument unused{{.*}}-imsvc" // SLASHX-NOT: "-internal-isystem" "/my/system/inc" +// SLASHX-NOT: "-internal-isystem" "/my/system/inc2" // SLASHX: "-internal-isystem" "{{.*lib.*clang.*include}}" // SLASHX: "-internal-isystem" "/my/other/inc" +// SLASHX: "-internal-isystem" "/my/other/inc2" + +// /winsysroot suppresses %EXTERNAL_INCLUDE% but not -imsvc dirs or /external: flags. +// RUN: env env EXTERNAL_INCLUDE=/my/system/inc env FOO=/my/other/inc2 %clang_cl /winsysroot /foo -imsvc /my/other/inc /external:env:FOO -### -- %s 2>&1 | FileCheck %s --check-prefix=SYSROOT +// SYSROOT-NOT: "argument unused{{.*}}-imsvc" +// SYSROOT-NOT: "argument unused{{.*}}/external:" +// SYSROOT-NOT: "/my/system/inc" +// SYSROOT: "-internal-isystem" "/my/other/inc" +// SYSROOT: "-internal-isystem" "/my/other/inc2" +// SYSROOT: "-internal-isystem" "/foo{{.*}}" + +// RUN: env "FOO=/dir1;/dir2" env "BAR=/dir3" %clang_cl /external:env:FOO /external:env:BAR -### -- %s 2>&1 | FileCheck %s --check-prefix=EXTERNAL_ENV +// EXTERNAL_ENV: "-internal-isystem" "/dir1" +// EXTERNAL_ENV: "-internal-isystem" "/dir2" +// EXTERNAL_ENV: "-internal-isystem" "/dir3" diff --git a/clang/test/Driver/cl-options.c b/clang/test/Driver/cl-options.c index 258ac451fee08..a483cc260612b 100644 --- a/clang/test/Driver/cl-options.c +++ b/clang/test/Driver/cl-options.c @@ -38,6 +38,10 @@ // EP: "-P" // EP: "-o" "-" +// RUN: %clang_cl /external:Ipath -### -- %s 2>&1 | FileCheck -check-prefix=EXTERNAL_I %s +// RUN: %clang_cl /external:I path -### -- %s 2>&1 | FileCheck -check-prefix=EXTERNAL_I %s +// EXTERNAL_I: "-isystem" "path" + // RUN: %clang_cl /fp:fast /fp:except -### -- %s 2>&1 | FileCheck -check-prefix=fpexcept %s // fpexcept-NOT: -menable-unsafe-fp-math @@ -434,8 +438,6 @@ // RUN: /experimental:preprocessor \ // RUN: /exportHeader /headerName:foo \ // RUN: /external:anglebrackets \ -// RUN: /external:Ipath \ -// RUN: /external:I path \ // RUN: /external:env:var \ // RUN: /external:W0 \ // RUN: /external:W1 \ diff --git a/clang/test/Driver/freebsd.cpp b/clang/test/Driver/freebsd.cpp index baf52f77dd07f..fde888902e12c 100644 --- a/clang/test/Driver/freebsd.cpp +++ b/clang/test/Driver/freebsd.cpp @@ -5,9 +5,12 @@ // CHECK-TEN: "-lc++" "-lm" // CHECK-NINE: "-lstdc++" "-lm" +// RUN: %clangxx %s -### -pg -o %t.o -target amd64-unknown-freebsd40.0 -stdlib=platform 2>&1 \ +// RUN: | FileCheck --check-prefix=CHECK-PG-FOURTEEN %s // RUN: %clangxx %s -### -pg -o %t.o -target amd64-unknown-freebsd10.0 -stdlib=platform 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-PG-TEN %s // RUN: %clangxx %s -### -pg -o %t.o -target amd64-unknown-freebsd9.2 -stdlib=platform 2>&1 \ // RUN: | FileCheck --check-prefix=CHECK-PG-NINE %s +// CHECK-PG-FOURTEEN: "-lc++" "-lm" // CHECK-PG-TEN: "-lc++_p" "-lm_p" // CHECK-PG-NINE: "-lstdc++_p" "-lm_p" diff --git a/clang/test/Driver/sycl-intelfpga-aoco-win.cpp b/clang/test/Driver/sycl-intelfpga-aoco-win.cpp index 71150213b3a56..8e1470aa17a24 100755 --- a/clang/test/Driver/sycl-intelfpga-aoco-win.cpp +++ b/clang/test/Driver/sycl-intelfpga-aoco-win.cpp @@ -2,12 +2,12 @@ // RUN: echo "Dummy AOCO image" > %t.aoco // RUN: echo "void foo() {}" > %t.c // RUN: echo "void foo2() {}" > %t2.c -// RUN: %clang -c -o %t.o %t.c -// RUN: %clang_cl -fsycl -c -o %t2.o %t2.c +// RUN: %clang -target x86_64-pc-windows-msvc -c -o %t.o %t.c +// RUN: %clang_cl --target=x86_64-pc-windows-msvc -fsycl -c -o %t2.o %t2.c // RUN: clang-offload-wrapper -o %t-aoco.bc -host=x86_64-pc-windows-msvc -kind=sycl -target=fpga_aoco-intel-unknown-sycldevice %t.aoco // RUN: llc -filetype=obj -o %t-aoco.o %t-aoco.bc // RUN: llvm-ar crv %t_aoco.a %t.o %t2.o %t-aoco.o -// RUN: %clang_cl -fsycl-use-footer -fsycl -fno-sycl-device-lib=all -fintelfpga -foffload-static-lib=%t_aoco.a %s -ccc-print-phases 2>&1 \ +// RUN: %clang_cl --target=x86_64-pc-windows-msvc -fsycl-use-footer -fsycl -fno-sycl-device-lib=all -fintelfpga -foffload-static-lib=%t_aoco.a %s -ccc-print-phases 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-FPGA-AOCO-PHASES-WIN %s // RUN: %clangxx -fsycl-use-footer -target x86_64-pc-windows-msvc -fsycl -fno-sycl-device-lib=all -fintelfpga -foffload-static-lib=%t_aoco.a %s -ccc-print-phases 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-FPGA-AOCO-PHASES-WIN %s @@ -42,9 +42,9 @@ // CHK-FPGA-AOCO-PHASES-WIN: 28: offload, "host-sycl (x86_64-pc-windows-msvc)" {12}, "device-sycl (spir64_fpga-unknown-unknown-sycldevice)" {27}, image /// aoco test, checking tools -// RUN: %clang_cl -fsycl -fno-sycl-device-lib=all -fintelfpga -foffload-static-lib=%t_aoco.a -Xshardware -### %s 2>&1 \ +// RUN: %clang_cl --target=x86_64-pc-windows-msvc -fsycl -fno-sycl-device-lib=all -fintelfpga -foffload-static-lib=%t_aoco.a -Xshardware -### %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-FPGA-AOCO %s -// RUN: %clang_cl -fsycl -fno-sycl-device-lib=all -fintelfpga %t_aoco.a -Xshardware -### %s 2>&1 \ +// RUN: %clang_cl --target=x86_64-pc-windows-msvc -fsycl -fno-sycl-device-lib=all -fintelfpga %t_aoco.a -Xshardware -### %s 2>&1 \ // RUN: | FileCheck -check-prefix=CHK-FPGA-AOCO %s // CHK-FPGA-AOCO: clang-offload-bundler{{.*}} "-type=a" "-targets=sycl-spir64_fpga-unknown-unknown-sycldevice" "-inputs=[[INPUTLIB:.+\.a]]" "-outputs=[[OUTLIB:.+\.a]]" "-unbundle" // CHK-FPGA-AOCO: llvm-link{{.*}} "[[OUTLIB]]" "-o" "[[LINKEDBC:.+\.bc]]" diff --git a/clang/test/Driver/sycl-offload-intelfpga.cpp b/clang/test/Driver/sycl-offload-intelfpga.cpp index 232639b93bd56..6c1fe42230e63 100644 --- a/clang/test/Driver/sycl-offload-intelfpga.cpp +++ b/clang/test/Driver/sycl-offload-intelfpga.cpp @@ -174,7 +174,7 @@ // RUN: llvm-ar crv %t_aocx.a %t.o %t-aocx.o // RUN: %clangxx -target x86_64-unknown-linux-gnu -Xshardware -fsycl -fintelfpga %t_aocx.a -ccc-print-phases 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-FPGA-AOCX-PHASES %s -// RUN: %clang_cl -Xshardware -fsycl -fintelfpga %t_aocx.a -ccc-print-phases 2>&1 \ +// RUN: %clang_cl --target=x86_64-pc-windows-msvc -Xshardware -fsycl -fintelfpga %t_aocx.a -ccc-print-phases 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-FPGA-AOCX-PHASES %s // CHK-FPGA-AOCX-PHASES: 0: input, "{{.*}}", fpga_aocx, (host-sycl) // CHK-FPGA-AOCX-PHASES: 1: linker, {0}, image, (host-sycl) @@ -306,7 +306,7 @@ /// -fintelfpga dependency file generation test to object // RUN: %clangxx -### -fsycl -fintelfpga -target x86_64-unknown-linux-gnu %t-1.cpp %t-2.cpp -c 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-FPGA-DEP-FILES2,CHK-FPGA-DEP-FILES2-LIN %s -// RUN: %clang_cl -### -fsycl -fintelfpga %t-1.cpp %t-2.cpp -c 2>&1 \ +// RUN: %clang_cl -### -fsycl -fintelfpga --target=x86_64-pc-windows-msvc %t-1.cpp %t-2.cpp -c 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-FPGA-DEP-FILES2,CHK-FPGA-DEP-FILES2-WIN %s // CHK-FPGA-DEP-FILES2: clang{{.*}} "-dependency-file" "[[INPUT1:.+\.d]]" // CHK-FPGA-DEP-FILES2-LIN: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_fpga-unknown-unknown-sycldevice,host-x86_64-unknown-linux-gnu,sycl-fpga_dep" {{.*}} "-inputs={{.*}}.bc,{{.*}}.o,[[INPUT1]]" @@ -321,7 +321,7 @@ // RUN: | FileCheck -check-prefixes=CHK-FPGA-DEP-FILES3,CHK-FPGA-DEP-FILES3-LIN %s // RUN: %clangxx -### -target x86_64-unknown-linux-gnu -fsycl -fintelfpga %t-1.cpp -c -MMD -MF"dummy.d" 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-FPGA-DEP-FILES3,CHK-FPGA-DEP-FILES3-LIN %s -// RUN: %clang_cl -### -fsycl -fintelfpga %t-1.cpp -c -Fodummy.obj 2>&1 \ +// RUN: %clang_cl -### --target=x86_64-pc-windows-msvc -fsycl -fintelfpga %t-1.cpp -c -Fodummy.obj 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-FPGA-DEP-FILES3,CHK-FPGA-DEP-FILES3-WIN %s // CHK-FPGA-DEP-FILES3: clang{{.*}} "-dependency-file" "[[OUTPUT:.+\.d]]" // CHK-FPGA-DEP-FILES3-LIN: clang-offload-bundler{{.*}} "-type=o" "-targets=sycl-spir64_fpga-unknown-unknown-sycldevice,host-x86_64-unknown-linux-gnu,sycl-fpga_dep" {{.*}} "-inputs={{.*}}.bc,{{.*}}.o,[[OUTPUT]]" diff --git a/clang/test/Driver/sycl-offload-win.c b/clang/test/Driver/sycl-offload-win.c index 24618080a7bfb..b2a7e35d43b5b 100644 --- a/clang/test/Driver/sycl-offload-win.c +++ b/clang/test/Driver/sycl-offload-win.c @@ -8,7 +8,7 @@ /// Test behaviors of -foffload-static-lib= with single object. // Build the offload library that is used for the tests. // RUN: echo "void foo() {}" > %t.c -// RUN: %clang_cl -fsycl -c -Fo%t-orig.obj %t.c +// RUN: %clang_cl --target=x86_64-pc-windows-msvc -fsycl -c -Fo%t-orig.obj %t.c // RUN: llvm-ar cr %t-orig.lib %t-orig.obj // RUN: %clang --target=x86_64-pc-windows-msvc -fsycl -fno-sycl-device-lib=all %t-orig.lib %t-orig.obj -### 2>&1 \ // RUN: | FileCheck %s -check-prefix=FOFFLOAD_STATIC_LIB diff --git a/clang/test/Driver/sycl-offload.c b/clang/test/Driver/sycl-offload.c index 3a1368c3a24fb..d0d3710bde121 100644 --- a/clang/test/Driver/sycl-offload.c +++ b/clang/test/Driver/sycl-offload.c @@ -96,16 +96,24 @@ /// Check no error for -fsycl-targets with good triple // RUN: %clang -### -fsycl-targets=spir-unknown-unknown-sycldevice -fsycl %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-SYCL-FPGA-TRIPLE %s +// RUN: | FileCheck -check-prefix=CHK-SYCL-TARGET %s +// RUN: %clang -### -fsycl-targets=spir64 -fsycl %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-SYCL-TARGET %s // RUN: %clang -### -fsycl-targets=spir64_fpga-unknown-unknown-sycldevice -fsycl %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-SYCL-FPGA-TRIPLE %s +// RUN: | FileCheck -check-prefix=CHK-SYCL-TARGET %s +// RUN: %clang -### -fsycl-targets=spir64_fpga -fsycl %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-SYCL-TARGET %s // RUN: %clang -### -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice -fsycl %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-SYCL-FPGA-TRIPLE %s +// RUN: | FileCheck -check-prefix=CHK-SYCL-TARGET %s +// RUN: %clang -### -fsycl-targets=spir64_x86_64 -fsycl %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-SYCL-TARGET %s // RUN: %clang -### -fsycl-targets=spir64_gen-unknown-unknown-sycldevice -fsycl %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-SYCL-FPGA-TRIPLE %s +// RUN: | FileCheck -check-prefix=CHK-SYCL-TARGET %s +// RUN: %clang -### -fsycl-targets=spir64_gen -fsycl %s 2>&1 \ +// RUN: | FileCheck -check-prefix=CHK-SYCL-TARGET %s // RUN: %clang_cl -### -fsycl-targets=spir-unknown-unknown-sycldevice -fsycl %s 2>&1 \ -// RUN: | FileCheck -check-prefix=CHK-SYCL-FPGA-TRIPLE %s -// CHK-SYCL-FPGA-TRIPLE-NOT: error: SYCL target is invalid +// RUN: | FileCheck -check-prefix=CHK-SYCL-TARGET %s +// CHK-SYCL-TARGET-NOT: error: SYCL target is invalid /// Check error for -fsycl-[add|link]-targets with bad triple // RUN: %clang -### -fsycl-add-targets=spir64_bad-unknown-unknown-sycldevice:dummy.spv -fsycl %s 2>&1 \ @@ -163,15 +171,15 @@ /// The same phase graph will be used with -fsycl-use-bitcode // RUN: %clang -ccc-print-phases -target x86_64-unknown-linux-gnu -fsycl-use-footer -fsycl -fsycl-targets=spir64-unknown-unknown-sycldevice -fno-sycl-device-lib=all %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-PHASES,CHK-PHASES-DEFAULT-MODE %s -// RUN: %clang_cl -ccc-print-phases -fsycl-use-footer -fsycl -fsycl-targets=spir64-unknown-unknown-sycldevice -fno-sycl-device-lib=all %s 2>&1 \ +// RUN: %clang_cl -ccc-print-phases --target=x86_64-pc-windows-msvc -fsycl-use-footer -fsycl -fsycl-targets=spir64-unknown-unknown-sycldevice -fno-sycl-device-lib=all %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-PHASES,CHK-PHASES-CL-MODE %s // RUN: %clang -ccc-print-phases -target x86_64-unknown-linux-gnu -fsycl-use-footer -fsycl -fno-sycl-use-bitcode -fno-sycl-device-lib=all %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-PHASES,CHK-PHASES-DEFAULT-MODE %s -// RUN: %clang_cl -ccc-print-phases -fsycl-use-footer -fsycl -fno-sycl-use-bitcode -fno-sycl-device-lib=all %s 2>&1 \ +// RUN: %clang_cl -ccc-print-phases --target=x86_64-pc-windows-msvc -fsycl-use-footer -fsycl -fno-sycl-use-bitcode -fno-sycl-device-lib=all %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-PHASES,CHK-PHASES-CL-MODE %s // RUN: %clang -ccc-print-phases -target x86_64-unknown-linux-gnu -fsycl-use-footer -fsycl -fsycl-use-bitcode -fno-sycl-device-lib=all %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-PHASES,CHK-PHASES-DEFAULT-MODE %s -// RUN: %clang_cl -ccc-print-phases -fsycl-use-footer -fsycl -fsycl-use-bitcode -fno-sycl-device-lib=all %s 2>&1 \ +// RUN: %clang_cl -ccc-print-phases --target=x86_64-pc-windows-msvc -fsycl-use-footer -fsycl -fsycl-use-bitcode -fno-sycl-device-lib=all %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-PHASES,CHK-PHASES-CL-MODE %s // CHK-PHASES: 0: input, "[[INPUT:.+\.c]]", c++, (host-sycl) // CHK-PHASES: 1: preprocessor, {0}, c++-cpp-output, (host-sycl) @@ -302,6 +310,8 @@ // RUN: touch %t.o // RUN: %clang -### -ccc-print-phases -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -o %t.out -lsomelib -fsycl-targets=spir64-unknown-unknown-sycldevice %t.o 2>&1 \ // RUN: | FileCheck -DINPUT=%t.o -check-prefix=CHK-UBACTIONS %s +// RUN: %clang -### -ccc-print-phases -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -o %t.out -lsomelib -fsycl-targets=spir64 %t.o 2>&1 \ +// RUN: | FileCheck -DINPUT=%t.o -check-prefix=CHK-UBACTIONS %s // RUN: mkdir -p %t_dir // RUN: touch %t_dir/dummy // RUN: %clang -### -ccc-print-phases -target x86_64-unknown-linux-gnu -fsycl -fno-sycl-device-lib=all -o %t.out -lsomelib -fsycl-targets=spir64-unknown-unknown-sycldevice %t_dir/dummy 2>&1 \ @@ -625,10 +635,16 @@ /// Ahead of Time compilation for fpga, gen, cpu // RUN: %clang -target x86_64-unknown-linux-gnu -ccc-print-phases -fsycl-use-footer -fsycl -fno-sycl-device-lib=all -fsycl-targets=spir64_fpga-unknown-unknown-sycldevice %s 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHK-PHASES-AOT,CHK-PHASES-FPGA +// RUN: %clang -target x86_64-unknown-linux-gnu -ccc-print-phases -fsycl-use-footer -fsycl -fno-sycl-device-lib=all -fsycl-targets=spir64_fpga %s 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHK-PHASES-AOT,CHK-PHASES-FPGA // RUN: %clang -target x86_64-unknown-linux-gnu -ccc-print-phases -fsycl-use-footer -fsycl -fno-sycl-device-lib=all -fsycl-targets=spir64_gen-unknown-unknown-sycldevice %s 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHK-PHASES-AOT,CHK-PHASES-GEN +// RUN: %clang -target x86_64-unknown-linux-gnu -ccc-print-phases -fsycl-use-footer -fsycl -fno-sycl-device-lib=all -fsycl-targets=spir64_gen %s 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHK-PHASES-AOT,CHK-PHASES-GEN // RUN: %clang -target x86_64-unknown-linux-gnu -ccc-print-phases -fsycl-use-footer -fsycl -fno-sycl-device-lib=all -fsycl-targets=spir64_x86_64-unknown-unknown-sycldevice %s 2>&1 \ // RUN: | FileCheck %s -check-prefixes=CHK-PHASES-AOT,CHK-PHASES-CPU +// RUN: %clang -target x86_64-unknown-linux-gnu -ccc-print-phases -fsycl-use-footer -fsycl -fno-sycl-device-lib=all -fsycl-targets=spir64_x86_64 %s 2>&1 \ +// RUN: | FileCheck %s -check-prefixes=CHK-PHASES-AOT,CHK-PHASES-CPU // CHK-PHASES-AOT: 0: input, "[[INPUT:.+\.c]]", c++, (host-sycl) // CHK-PHASES-AOT: 1: preprocessor, {0}, c++-cpp-output, (host-sycl) // CHK-PHASES-AOT: 2: append-footer, {1}, c++, (host-sycl) @@ -693,7 +709,8 @@ // CHK-TOOLS-AOT: file-table-tform{{.*}} "-extract=Code" "-drop_titles" "-o" "[[OUTPUT2_1:.+\.txt]]" "[[OUTPUT2_T]]" // CHK-TOOLS-CPU: llvm-spirv{{.*}} "-o" "[[OUTPUT3_T:.+\.txt]]" "-spirv-max-version=1.3" "-spirv-debug-info-version=ocl-100" "-spirv-allow-extra-diexpressions" "-spirv-allow-unknown-intrinsics=llvm.genx." {{.*}} "[[OUTPUT2_1]]" // CHK-TOOLS-GEN: llvm-spirv{{.*}} "-o" "[[OUTPUT3_T:.+\.txt]]" "-spirv-max-version=1.3" "-spirv-debug-info-version=ocl-100" "-spirv-allow-extra-diexpressions" "-spirv-allow-unknown-intrinsics=llvm.genx." {{.*}} "[[OUTPUT2_1]]" -// CHK-TOOLS-FPGA: llvm-spirv{{.*}} "-o" "[[OUTPUT3_T:.+\.txt]]" "-spirv-max-version=1.3" "-spirv-debug-info-version=ocl-100" "-spirv-allow-extra-diexpressions" "-spirv-allow-unknown-intrinsics=llvm.genx." {{.*}} "[[OUTPUT2_1]]" +// CHK-TOOLS-FPGA-HW: llvm-spirv{{.*}} "-o" "[[OUTPUT3_T:.+\.txt]]" "-spirv-max-version=1.3" "-spirv-debug-info-version=legacy" "-spirv-allow-extra-diexpressions" "-spirv-allow-unknown-intrinsics=llvm.genx." {{.*}} "[[OUTPUT2_1]]" +// CHK-TOOLS-FPGA-EMU: llvm-spirv{{.*}} "-o" "[[OUTPUT3_T:.+\.txt]]" "-spirv-max-version=1.3" "-spirv-debug-info-version=ocl-100" "-spirv-allow-extra-diexpressions" "-spirv-allow-unknown-intrinsics=llvm.genx." {{.*}} "[[OUTPUT2_1]]" // CHK-TOOLS-FPGA-HW: aoc{{.*}} "-o" "[[OUTPUT4_T:.+\.aocx]]" "[[OUTPUT3_T]]" // CHK-TOOLS-FPGA-EMU: opencl-aot{{.*}} "-spv=[[OUTPUT3_T]]" "-ir=[[OUTPUT4_T:.+\.aocx]]" // CHK-TOOLS-GEN: ocloc{{.*}} "-output" "[[OUTPUT4_T:.+\.out]]" {{.*}} "[[OUTPUT3_T]]" @@ -804,6 +821,9 @@ // RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64-unknown-unknown-sycldevice,spir64_gen-unknown-unknown-sycldevice \ // RUN: -Xsycl-target-backend=spir64_gen-unknown-unknown-sycldevice "-device skl -cl-opt-disable" -Xsycl-target-linker=spir64-unknown-unknown-sycldevice "-cl-denorms-are-zero" %s 2>&1 \ // RUN: | FileCheck -check-prefixes=CHK-TOOLS-MULT-OPTS,CHK-TOOLS-MULT-OPTS-NEG %s +// RUN: %clang -### -target x86_64-unknown-linux-gnu -fsycl -fsycl-targets=spir64,spir64_gen \ +// RUN: -Xsycl-target-backend=spir64_gen "-device skl -cl-opt-disable" -Xsycl-target-linker=spir64 "-cl-denorms-are-zero" %s 2>&1 \ +// RUN: | FileCheck -check-prefixes=CHK-TOOLS-MULT-OPTS,CHK-TOOLS-MULT-OPTS-NEG %s // CHK-TOOLS-MULT-OPTS: clang-offload-wrapper{{.*}} "-link-opts=-cl-denorms-are-zero"{{.*}} "-target=spir64" // CHK-TOOLS-MULT-OPTS: ocloc{{.*}} "-device" "skl"{{.*}} "-cl-opt-disable" // CHK-TOOLS-MULT-OPTS-NEG-NOT: clang-offload-wrapper{{.*}} "-compile-opts=-device skl -cl-opt-disable"{{.*}} "-target=spir64" diff --git a/clang/test/Driver/unsupported-option.c b/clang/test/Driver/unsupported-option.c index d0611977a99e1..975440352259d 100644 --- a/clang/test/Driver/unsupported-option.c +++ b/clang/test/Driver/unsupported-option.c @@ -1,7 +1,23 @@ // RUN: not %clang %s --hedonism -### 2>&1 | \ // RUN: FileCheck %s +// CHECK: error: unsupported option '--hedonism' + // RUN: not %clang %s --hell -### 2>&1 | \ // RUN: FileCheck %s --check-prefix=DID-YOU-MEAN - -// CHECK: error: unsupported option '--hedonism' // DID-YOU-MEAN: error: unsupported option '--hell'; did you mean '--help'? + +// RUN: not %clang -fprofile-instr-generate --target=powerpc-ibm-aix %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=INVALID-AIX-PROFILE +// INVALID-AIX-PROFILE: error: unsupported option '-fprofile-instr-generate' for target + +// RUN: not %clang -fprofile-sample-use=code.prof --target=powerpc-ibm-aix %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=AIX-PROFILE-SAMPLE +// AIX-PROFILE-SAMPLE: error: unsupported option '-fprofile-sample-use=' for target + +// RUN: not %clang -fprofile-generate --target=powerpc-ibm-aix %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=AIX-PROFILE-LTO +// AIX-PROFILE-LTO: error: invalid argument '-fprofile-generate' only allowed with '-flto' + +// RUN: not %clang -fprofile-generate -flto=thin --target=powerpc64-ibm-aix %s 2>&1 | \ +// RUN: FileCheck %s --check-prefix=AIX-PROFILE-THINLTO +// AIX-PROFILE-THINLTO: error: invalid argument '-fprofile-generate' only allowed with '-flto' diff --git a/clang/test/FixIt/fixit.cpp b/clang/test/FixIt/fixit.cpp index bfff20e76c145..8a3500c9f210e 100644 --- a/clang/test/FixIt/fixit.cpp +++ b/clang/test/FixIt/fixit.cpp @@ -1,12 +1,12 @@ -// RUN: %clang_cc1 -pedantic -Wall -Wno-unused-but-set-variable -Wno-comment -verify -fcxx-exceptions -x c++ -std=c++98 %s +// RUN: %clang_cc1 -pedantic -Wall -Wno-unused-but-set-variable -Wno-comment -verify -fcxx-exceptions -x c++ -std=c++98 -Wno-c++14-extensions %s // RUN: cp %s %t-98 -// RUN: not %clang_cc1 -pedantic -Wall -Wno-unused-but-set-variable -Wno-comment -fcxx-exceptions -fixit -x c++ -std=c++98 %t-98 -// RUN: %clang_cc1 -fsyntax-only -pedantic -Wall -Wno-unused-but-set-variable -Werror -Wno-comment -fcxx-exceptions -x c++ -std=c++98 %t-98 -// RUN: not %clang_cc1 -fsyntax-only -pedantic -fdiagnostics-parseable-fixits -x c++ -std=c++11 %s 2>&1 | FileCheck %s -// RUN: %clang_cc1 -pedantic -Wall -Wno-unused-but-set-variable -Wno-comment -verify -fcxx-exceptions -x c++ -std=c++11 %s +// RUN: not %clang_cc1 -pedantic -Wall -Wno-unused-but-set-variable -Wno-comment -fcxx-exceptions -fixit -x c++ -std=c++98 -Wno-c++14-extensions %t-98 +// RUN: %clang_cc1 -fsyntax-only -pedantic -Wall -Wno-unused-but-set-variable -Werror -Wno-comment -fcxx-exceptions -x c++ -std=c++98 -Wno-c++14-extensions %t-98 +// RUN: not %clang_cc1 -fsyntax-only -pedantic -fdiagnostics-parseable-fixits -x c++ -std=c++11 -Wno-c++14-extensions %s 2>&1 | FileCheck %s +// RUN: %clang_cc1 -pedantic -Wall -Wno-unused-but-set-variable -Wno-comment -verify -fcxx-exceptions -x c++ -std=c++11 -Wno-c++14-extensions %s // RUN: cp %s %t-11 -// RUN: not %clang_cc1 -pedantic -Wall -Wno-unused-but-set-variable -Wno-comment -fcxx-exceptions -fixit -x c++ -std=c++11 %t-11 -// RUN: %clang_cc1 -fsyntax-only -pedantic -Wall -Wno-unused-but-set-variable -Werror -Wno-comment -fcxx-exceptions -x c++ -std=c++11 %t-11 +// RUN: not %clang_cc1 -pedantic -Wall -Wno-unused-but-set-variable -Wno-comment -fcxx-exceptions -fixit -x c++ -std=c++11 -Wno-c++14-extensions %t-11 +// RUN: %clang_cc1 -fsyntax-only -pedantic -Wall -Wno-unused-but-set-variable -Werror -Wno-comment -fcxx-exceptions -x c++ -std=c++11 -Wno-c++14-extensions %t-11 /* This is a test of the various code modification hints that are provided as part of warning or extension diagnostics. All of the @@ -292,21 +292,21 @@ namespace greatergreater { template> struct TemplateTemplateParam; // expected-error {{requires 'class'}} - template void t(); + template int t = 0; void g() { - void (*p)() = &t; - (void)(&t==p); // expected-error {{use '> ='}} - (void)(&t>=p); // expected-error {{use '> >'}} + int p = 0; + (void)(t==p); // expected-error {{use '> ='}} + (void)(t>=p); // expected-error {{use '> >'}} #if __cplusplus < 201103L - (void)(&t>>=p); // expected-error {{use '> >'}} - (Shr)&t>>>=p; // expected-error {{use '> >'}} + (void)(t>>=p); // expected-error {{use '> >'}} + (Shr)t>>>=p; // expected-error {{use '> >'}} #endif - // FIXME: We correct this to '&t > >= p;' not '&t >>= p;' - //(Shr)&t>>=p; + // FIXME: We correct this to 't > >= p;' not 't >>= p;' + //(Shr)t>>=p; // FIXME: The fix-its here overlap. - //(void)(&t>==p); + //(void)(t>==p); } } diff --git a/clang/test/Frontend/backend-diagnostic.c b/clang/test/Frontend/backend-diagnostic.c index 01029d7f83d66..695158cdd186e 100644 --- a/clang/test/Frontend/backend-diagnostic.c +++ b/clang/test/Frontend/backend-diagnostic.c @@ -15,9 +15,9 @@ extern void doIt(char *); -// REGULAR: warning: stack frame size of {{[0-9]+}} bytes in function 'stackSizeWarning' -// PROMOTE: error: stack frame size of {{[0-9]+}} bytes in function 'stackSizeWarning' -// IGNORE-NOT: stack frame size of {{[0-9]+}} bytes in function 'stackSizeWarning' +// REGULAR: warning: stack frame size ([[#]]) exceeds limit ([[#]]) in function 'stackSizeWarning' +// PROMOTE: error: stack frame size ([[#]]) exceeds limit ([[#]]) in function 'stackSizeWarning' +// IGNORE-NOT: stack frame size ([[#]]) exceeds limit ([[#]]) in function 'stackSizeWarning' void stackSizeWarning() { char buffer[80]; doIt(buffer); diff --git a/clang/test/Frontend/fwarn-stack-size.c b/clang/test/Frontend/fwarn-stack-size.c new file mode 100644 index 0000000000000..7bffbbd5b0b36 --- /dev/null +++ b/clang/test/Frontend/fwarn-stack-size.c @@ -0,0 +1,4 @@ +// RUN: %clang_cc1 -fwarn-stack-size=42 -emit-llvm -o - %s | FileCheck %s +void foo(void) {} +// CHECK: define {{.*}} @foo() [[ATTR:#[0-9]+]] { +// CHECK: attributes [[ATTR]] = {{.*}} "warn-stack-size"="42" diff --git a/clang/test/Headers/hip-header.hip b/clang/test/Headers/hip-header.hip index 323138613055f..0e95d58d55700 100644 --- a/clang/test/Headers/hip-header.hip +++ b/clang/test/Headers/hip-header.hip @@ -8,12 +8,49 @@ // RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \ // RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \ // RUN: -internal-isystem %S/Inputs/include \ +// RUN: -include cmath \ +// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \ +// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \ +// RUN: -D__HIPCC_RTC__ | FileCheck %s -check-prefixes=AMD_BOOL_RETURN +// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \ +// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \ +// RUN: -internal-isystem %S/Inputs/include \ +// RUN: -include cmath \ +// RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \ +// RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \ +// RUN: -D__HIPCC_RTC__ -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s -check-prefixes=AMD_INT_RETURN +// RUN: %clang_cc1 -include __clang_hip_runtime_wrapper.h \ +// RUN: -internal-isystem %S/../../lib/Headers/cuda_wrappers \ +// RUN: -internal-isystem %S/Inputs/include \ // RUN: -triple amdgcn-amd-amdhsa -aux-triple x86_64-unknown-unknown \ // RUN: -target-cpu gfx906 -emit-llvm %s -fcuda-is-device -o - \ // RUN: -D__HIPCC_RTC__ -std=c++14 | FileCheck -check-prefixes=CHECK,CXX14 %s // expected-no-diagnostics +// Check support for pure and deleted virtual functions +struct base { + __host__ + __device__ + virtual void pv() = 0; + __host__ + __device__ + virtual void dv() = delete; +}; +struct derived:base { + __host__ + __device__ + virtual void pv() override {}; +}; +__device__ void test_vf() { + derived d; +} +// CHECK: @_ZTV7derived = linkonce_odr unnamed_addr addrspace(1) constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* null, i8* bitcast (void (%struct.derived*)* @_ZN7derived2pvEv to i8*), i8* bitcast (void ()* @__cxa_deleted_virtual to i8*)] }, comdat, align 8 +// CHECK: @_ZTV4base = linkonce_odr unnamed_addr addrspace(1) constant { [4 x i8*] } { [4 x i8*] [i8* null, i8* null, i8* bitcast (void ()* @__cxa_pure_virtual to i8*), i8* bitcast (void ()* @__cxa_deleted_virtual to i8*)] }, comdat, align 8 + +// CHECK: define{{.*}}void @__cxa_pure_virtual() +// CHECK: define{{.*}}void @__cxa_deleted_virtual() + struct Number { __device__ Number(float _x) : x(_x) {} float x; @@ -61,3 +98,20 @@ __device__ float test_floor() { __device__ float test_max() { return max(5, 6.0); } + +// CHECK-LABEL: define{{.*}}@_Z10test_isnanv +__device__ double test_isnan() { + double r = 0; + double d = 5.0; + float f = 5.0; + + // AMD_INT_RETURN: call i32 @__ocml_isnan_f32(float + // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f32(float + r += isnan(f); + + // AMD_INT_RETURN: call i32 @__ocml_isnan_f64(double + // AMD_BOOL_RETURN: call i32 @__ocml_isnan_f64(double + r += isnan(d); + + return r ; +} diff --git a/clang/test/Headers/openmp_device_math_isnan.cpp b/clang/test/Headers/openmp_device_math_isnan.cpp index 35443dbdebea6..7a75e4250c951 100644 --- a/clang/test/Headers/openmp_device_math_isnan.cpp +++ b/clang/test/Headers/openmp_device_math_isnan.cpp @@ -1,11 +1,19 @@ // RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc // RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=BOOL_RETURN +// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple amdgcn-amd-amdhsa -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=AMD_BOOL_RETURN // RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math -ffp-contract=fast +// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math -ffp-contract=fast // RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast | FileCheck %s --check-prefix=BOOL_RETURN +// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple amdgcn-amd-amdhsa -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast | FileCheck %s --check-prefix=AMD_BOOL_RETURN // RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -DUSE_ISNAN_WITH_INT_RETURN +// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc -DUSE_ISNAN_WITH_INT_RETURN // RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s --check-prefix=INT_RETURN +// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple amdgcn-amd-amdhsa -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s --check-prefix=AMD_INT_RETURN // RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math -ffp-contract=fast -DUSE_ISNAN_WITH_INT_RETURN +// RUN: %clang_cc1 -x c++ -internal-isystem %S/Inputs/include -fopenmp -triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm-bc %s -o %t-ppc-host.bc -ffast-math -ffp-contract=fast -DUSE_ISNAN_WITH_INT_RETURN // RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple nvptx64-nvidia-cuda -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s --check-prefix=INT_RETURN +// RUN: %clang_cc1 -x c++ -include __clang_openmp_device_functions.h -internal-isystem %S/../../lib/Headers/openmp_wrappers -internal-isystem %S/Inputs/include -fopenmp -triple amdgcn-amd-amdhsa -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=amdgcn-amd-amdhsa -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -ffast-math -ffp-contract=fast -DUSE_ISNAN_WITH_INT_RETURN | FileCheck %s --check-prefix=AMD_INT_RETURN // expected-no-diagnostics #include @@ -13,10 +21,14 @@ double math(float f, double d) { double r = 0; // INT_RETURN: call i32 @__nv_isnanf(float + // AMD_INT_RETURN: call i32 @_{{.*}}isnanf(float // BOOL_RETURN: call i32 @__nv_isnanf(float + // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnanf(float r += std::isnan(f); // INT_RETURN: call i32 @__nv_isnand(double + // AMD_INT_RETURN: call i32 @_{{.*}}isnand(double // BOOL_RETURN: call i32 @__nv_isnand(double + // AMD_BOOL_RETURN: call zeroext i1 @_{{.*}}isnand(double r += std::isnan(d); return r; } diff --git a/clang/test/Headers/stdarg.cpp b/clang/test/Headers/stdarg.cpp index 762c358990c2a..2278780d0c46d 100644 --- a/clang/test/Headers/stdarg.cpp +++ b/clang/test/Headers/stdarg.cpp @@ -15,7 +15,7 @@ #include -// AARCH64-C: define {{.*}} @f(i32 %n, %struct.__va_list* %list) +// AARCH64-C: define {{.*}} @f(i32 %n, %"struct.std::__va_list"* %list) // AARCH64-CXX: define {{.*}} @_Z1fiSt9__va_list(i32 %n, %"struct.std::__va_list"* %list) // X86_64-C: define {{.*}} @f(i32 %n, %struct.__va_list_tag* %list) // X86_64-CXX: define {{.*}} @_Z1fiP13__va_list_tag(i32 %n, %struct.__va_list_tag* %list) diff --git a/clang/test/Layout/dump-complete.cpp b/clang/test/Layout/dump-complete.cpp new file mode 100644 index 0000000000000..9ccbf477c7052 --- /dev/null +++ b/clang/test/Layout/dump-complete.cpp @@ -0,0 +1,18 @@ +// RUN: %clang_cc1 -emit-llvm-only -fdump-record-layouts-complete %s | FileCheck %s + +struct a { + int x; +}; + +struct b { + char y; +} foo; + +class c {}; + +class d; + +// CHECK: 0 | struct a +// CHECK: 0 | struct b +// CHECK: 0 | class c +// CHECK-NOT: 0 | class d diff --git a/clang/test/Lexer/cxx-features.cpp b/clang/test/Lexer/cxx-features.cpp index 8f283dd8c8d9f..40c73f6019420 100644 --- a/clang/test/Lexer/cxx-features.cpp +++ b/clang/test/Lexer/cxx-features.cpp @@ -31,6 +31,10 @@ // --- C++2b features --- +#if check(implicit_move, 0, 0, 0, 0, 0, 202011) +#error "wrong value for __cpp_implicit_move" +#endif + #if check(size_t_suffix, 0, 0, 0, 0, 0, 202011) #error "wrong value for __cpp_size_t_suffix" #endif diff --git a/clang/test/Lexer/pragma-operators.cpp b/clang/test/Lexer/pragma-operators.cpp index 4d288c9372d64..d9c3d36d78704 100644 --- a/clang/test/Lexer/pragma-operators.cpp +++ b/clang/test/Lexer/pragma-operators.cpp @@ -19,7 +19,7 @@ B(foo) #pragma warning(pop) #define pragma_L _Pragma(L"GCC diagnostic push") -#define pragma_u8 _Pragma(u8"system_header") +#define pragma_u8 _Pragma(u8"pack(1)") #define pragma_u _Pragma(u"GCC diagnostic pop") #define pragma_U _Pragma(U"comment(lib, \"libfoo\")") #define pragma_R _Pragma(R"(clang diagnostic ignored "-Wunused")") @@ -27,7 +27,7 @@ B(foo) #define pragma_hello _Pragma(u8R"x(message R"y("Hello", world!)y")x") // CHECK: int n = // CHECK: #pragma GCC diagnostic push -// CHECK: #pragma system_header +// CHECK: #pragma pack(1) // CHECK: #pragma GCC diagnostic pop // CHECK: #pragma comment(lib, "libfoo") // CHECK: #pragma clang diagnostic ignored "-Wunused" diff --git a/clang/test/Misc/backend-resource-limit-diagnostics.cl b/clang/test/Misc/backend-resource-limit-diagnostics.cl index 6e7619babe83b..d80f44f691867 100644 --- a/clang/test/Misc/backend-resource-limit-diagnostics.cl +++ b/clang/test/Misc/backend-resource-limit-diagnostics.cl @@ -1,7 +1,7 @@ // REQUIRES: amdgpu-registered-target // RUN: not %clang_cc1 -emit-codegen-only -triple=amdgcn-- %s 2>&1 | FileCheck %s -// CHECK: error: local memory limit exceeded (480000) in use_huge_lds +// CHECK: error: local memory (480000) exceeds limit in function 'use_huge_lds' kernel void use_huge_lds() { volatile local int huge[120000]; diff --git a/clang/test/Misc/backend-stack-frame-diagnostics-fallback.cpp b/clang/test/Misc/backend-stack-frame-diagnostics-fallback.cpp index 01b9ff598d86a..79c6ba9b23f6d 100644 --- a/clang/test/Misc/backend-stack-frame-diagnostics-fallback.cpp +++ b/clang/test/Misc/backend-stack-frame-diagnostics-fallback.cpp @@ -12,8 +12,8 @@ namespace frameSizeThunkWarning { virtual void f(); }; - // CHECK: warning: stack frame size of {{[0-9]+}} bytes in function 'frameSizeThunkWarning::B::f' - // CHECK: warning: stack size limit exceeded ({{[0-9]+}}) in {{[^ ]+}} + // CHECK: warning: stack frame size ([[#]]) exceeds limit ([[#]]) in function 'frameSizeThunkWarning::B::f' + // CHECK: warning: stack frame size ([[#]]) exceeds limit in function '_ZTv0_n12_N21frameSizeThunkWarning1B1fEv' void B::f() { volatile int x = 0; // Ensure there is stack usage. } diff --git a/clang/test/Misc/backend-stack-frame-diagnostics.cpp b/clang/test/Misc/backend-stack-frame-diagnostics.cpp index b02e7f4c471d5..f0ceac00ea357 100644 --- a/clang/test/Misc/backend-stack-frame-diagnostics.cpp +++ b/clang/test/Misc/backend-stack-frame-diagnostics.cpp @@ -26,7 +26,7 @@ void frameSizeWarning(int, int) {} void frameSizeWarning(); -void frameSizeWarning() { // expected-warning-re {{stack frame size of {{[0-9]+}} bytes in function 'frameSizeWarning'}} +void frameSizeWarning() { // expected-warning-re {{stack frame size ({{[0-9]+}}) exceeds limit ({{[0-9]+}}) in function 'frameSizeWarning'}} char buffer[80]; doIt(buffer); } @@ -45,7 +45,7 @@ void frameSizeWarningIgnored() { void frameSizeLocalClassWarning() { struct S { - S() { // expected-warning-re {{stack frame size of {{[0-9]+}} bytes in function 'frameSizeLocalClassWarning()::S::S'}} + S() { // expected-warning-re {{stack frame size ({{[0-9]+}}) exceeds limit ({{[0-9]+}}) in function 'frameSizeLocalClassWarning()::S::S'}} char buffer[80]; doIt(buffer); } @@ -55,7 +55,7 @@ void frameSizeLocalClassWarning() { void frameSizeLambdaWarning() { auto fn = - []() { // expected-warning-re {{stack frame size of {{[0-9]+}} bytes in lambda expression}} + []() { // expected-warning-re {{stack frame size ({{[0-9]+}}) exceeds limit ({{[0-9]+}}) in lambda expression}} char buffer[80]; doIt(buffer); }; @@ -64,7 +64,7 @@ void frameSizeLambdaWarning() { void frameSizeBlocksWarning() { auto fn = - ^() { // expected-warning-re {{stack frame size of {{[0-9]+}} bytes in block literal}} + ^() { // expected-warning-re {{stack frame size ({{[0-9]+}}) exceeds limit ({{[0-9]+}}) in block literal}} char buffer[80]; doIt(buffer); }; diff --git a/clang/test/Misc/target-invalid-cpu-note.c b/clang/test/Misc/target-invalid-cpu-note.c index b30a2709e0797..23f9f543becea 100644 --- a/clang/test/Misc/target-invalid-cpu-note.c +++ b/clang/test/Misc/target-invalid-cpu-note.c @@ -87,7 +87,7 @@ // AMDGCN-SAME: gfx802, iceland, tonga, gfx803, fiji, polaris10, polaris11, // AMDGCN-SAME: gfx805, tongapro, gfx810, stoney, gfx900, gfx902, gfx904, gfx906, // AMDGCN-SAME: gfx908, gfx909, gfx90a, gfx90c, gfx1010, gfx1011, gfx1012, gfx1013, gfx1030, gfx1031, -// AMDGCN-SAME: gfx1032, gfx1033, gfx1034 +// AMDGCN-SAME: gfx1032, gfx1033, gfx1034, gfx1035 // RUN: not %clang_cc1 -triple wasm64--- -target-cpu not-a-cpu -fsyntax-only %s 2>&1 | FileCheck %s --check-prefix WEBASM // WEBASM: error: unknown target CPU 'not-a-cpu' diff --git a/clang/test/OpenMP/assumes_include_nvptx.cpp b/clang/test/OpenMP/assumes_include_nvptx.cpp index 90e7e96152c67..adc01f53263f8 100644 --- a/clang/test/OpenMP/assumes_include_nvptx.cpp +++ b/clang/test/OpenMP/assumes_include_nvptx.cpp @@ -19,8 +19,6 @@ // CHECK-DAG: declare i32 @llvm.nvvm.read.ptx.sreg.tid.x() [[attr1]] // CHECK: declare void @__kmpc_kernel_init(i32, i16) // CHECK-NOT: # -// CHECK: declare void @__kmpc_data_sharing_init_stack() -// CHECK-NOT: # // CHECK: declare float @_Z3sinf(float) [[attr2:#[0-9]*]] // CHECK: declare void @__kmpc_kernel_deinit(i16) // CHECK-NOT: # diff --git a/clang/test/OpenMP/cancel_codegen.cpp b/clang/test/OpenMP/cancel_codegen.cpp index eae4bdbaf7a7f..2b7c1d6d254d7 100644 --- a/clang/test/OpenMP/cancel_codegen.cpp +++ b/clang/test/OpenMP/cancel_codegen.cpp @@ -384,29 +384,29 @@ for (int i = 0; i < argc; ++i) { // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META4:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 4) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK1-NEXT: br i1 [[TMP13]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]] // CHECK1: .cancel.exit.i: -// CHECK1-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK1-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT:%.*]] // CHECK1: .cancel.continue.i: -// CHECK1-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK1-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK1: .omp_outlined..1.exit: -// CHECK1-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK1-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK1-NEXT: ret i32 0 // // @@ -1004,29 +1004,29 @@ for (int i = 0; i < argc; ++i) { // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META4:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 // CHECK2-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 4) #[[ATTR2:[0-9]+]] // CHECK2-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK2-NEXT: br i1 [[TMP13]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]] // CHECK2: .cancel.exit.i: -// CHECK2-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK2-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT:%.*]] // CHECK2: .cancel.continue.i: -// CHECK2-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK2-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK2: .omp_outlined..1.exit: -// CHECK2-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK2-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK2-NEXT: ret i32 0 // // @@ -1631,29 +1631,29 @@ for (int i = 0; i < argc; ++i) { // CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK3-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META4:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i32 4) #[[ATTR2]] // CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK3-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]] // CHECK3: .cancel.exit.i: -// CHECK3-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK3-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK3-NEXT: br label [[DOTOMP_OUTLINED__EXIT:%.*]] // CHECK3: .cancel.continue.i: -// CHECK3-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK3-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK3-NEXT: br label [[DOTOMP_OUTLINED__EXIT]] // CHECK3: .omp_outlined..exit: -// CHECK3-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK3-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK3-NEXT: ret i32 0 // // @@ -2258,29 +2258,29 @@ for (int i = 0; i < argc; ++i) { // CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK4-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META4:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]]) #[[ATTR2:[0-9]+]] // CHECK4-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i32 4) #[[ATTR2]] // CHECK4-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK4-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]] // CHECK4: .cancel.exit.i: -// CHECK4-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK4-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK4-NEXT: br label [[DOTOMP_OUTLINED__EXIT:%.*]] // CHECK4: .cancel.continue.i: -// CHECK4-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK4-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK4-NEXT: br label [[DOTOMP_OUTLINED__EXIT]] // CHECK4: .omp_outlined..exit: -// CHECK4-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK4-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK4-NEXT: ret i32 0 // // @@ -2878,29 +2878,29 @@ for (int i = 0; i < argc; ++i) { // CHECK7-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK7-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK7-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK7-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META4:![0-9]+]]) -// CHECK7-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK7-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK7-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK7-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK7-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK7-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK7-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK7-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK7-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 +// CHECK7-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) +// CHECK7-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK7-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK7-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK7-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK7-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK7-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK7-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK7-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK7-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 // CHECK7-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 4) #[[ATTR2:[0-9]+]] // CHECK7-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK7-NEXT: br i1 [[TMP13]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]] // CHECK7: .cancel.exit.i: -// CHECK7-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK7-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK7-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT:%.*]] // CHECK7: .cancel.continue.i: -// CHECK7-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK7-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK7-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK7: .omp_outlined..1.exit: -// CHECK7-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK7-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK7-NEXT: ret i32 0 // // @@ -3498,29 +3498,29 @@ for (int i = 0; i < argc; ++i) { // CHECK8-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK8-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK8-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK8-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META4:![0-9]+]]) -// CHECK8-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK8-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK8-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK8-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK8-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK8-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK8-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK8-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK8-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK8-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 +// CHECK8-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) +// CHECK8-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK8-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK8-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK8-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK8-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK8-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK8-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK8-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK8-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK8-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 // CHECK8-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 4) #[[ATTR2:[0-9]+]] // CHECK8-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK8-NEXT: br i1 [[TMP13]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]] // CHECK8: .cancel.exit.i: -// CHECK8-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK8-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK8-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT:%.*]] // CHECK8: .cancel.continue.i: -// CHECK8-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK8-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK8-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK8: .omp_outlined..1.exit: -// CHECK8-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK8-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK8-NEXT: ret i32 0 // // @@ -4125,29 +4125,29 @@ for (int i = 0; i < argc; ++i) { // CHECK9-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK9-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK9-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK9-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META4:![0-9]+]]) -// CHECK9-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK9-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK9-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK9-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK9-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK9-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK9-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK9-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK9-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 +// CHECK9-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) +// CHECK9-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK9-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK9-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK9-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK9-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK9-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK9-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK9-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK9-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK9-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK9-NEXT: [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]]) #[[ATTR2:[0-9]+]] // CHECK9-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i32 4) #[[ATTR2]] // CHECK9-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK9-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]] // CHECK9: .cancel.exit.i: -// CHECK9-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK9-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK9-NEXT: br label [[DOTOMP_OUTLINED__EXIT:%.*]] // CHECK9: .cancel.continue.i: -// CHECK9-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK9-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK9-NEXT: br label [[DOTOMP_OUTLINED__EXIT]] // CHECK9: .omp_outlined..exit: -// CHECK9-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK9-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK9-NEXT: ret i32 0 // // @@ -4752,29 +4752,29 @@ for (int i = 0; i < argc; ++i) { // CHECK10-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK10-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK10-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK10-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META4:![0-9]+]]) -// CHECK10-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK10-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK10-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK10-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK10-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK10-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK10-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK10-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK10-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 +// CHECK10-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) +// CHECK10-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK10-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK10-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK10-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK10-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK10-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK10-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK10-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK10-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK10-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK10-NEXT: [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB12:[0-9]+]]) #[[ATTR2:[0-9]+]] // CHECK10-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i32 4) #[[ATTR2]] // CHECK10-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK10-NEXT: br i1 [[TMP12]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]] // CHECK10: .cancel.exit.i: -// CHECK10-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK10-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK10-NEXT: br label [[DOTOMP_OUTLINED__EXIT:%.*]] // CHECK10: .cancel.continue.i: -// CHECK10-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK10-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK10-NEXT: br label [[DOTOMP_OUTLINED__EXIT]] // CHECK10: .omp_outlined..exit: -// CHECK10-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK10-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK10-NEXT: ret i32 0 // // diff --git a/clang/test/OpenMP/cancellation_point_codegen.cpp b/clang/test/OpenMP/cancellation_point_codegen.cpp index 6b08a096a6b9e..401dc7f977d33 100644 --- a/clang/test/OpenMP/cancellation_point_codegen.cpp +++ b/clang/test/OpenMP/cancellation_point_codegen.cpp @@ -378,36 +378,36 @@ for (int i = 0; i < argc; ++i) { // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META4:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 4) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK1-NEXT: br i1 [[TMP13]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]] // CHECK1: .cancel.exit.i: -// CHECK1-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK1-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT:%.*]] // CHECK1: .cancel.continue.i: // CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 4) #[[ATTR2]] // CHECK1-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 // CHECK1-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT1_I:%.*]], label [[DOTCANCEL_CONTINUE2_I:%.*]] // CHECK1: .cancel.exit1.i: -// CHECK1-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK1-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK1: .cancel.continue2.i: -// CHECK1-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK1-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK1: .omp_outlined..1.exit: -// CHECK1-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK1-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK1-NEXT: ret i32 0 // // @@ -433,29 +433,29 @@ for (int i = 0; i < argc; ++i) { // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.1* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !23 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !23 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 // CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 4) #[[ATTR2]] // CHECK1-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK1-NEXT: br i1 [[TMP13]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]] // CHECK1: .cancel.exit.i: -// CHECK1-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !23 +// CHECK1-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !24 // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__2_EXIT:%.*]] // CHECK1: .cancel.continue.i: -// CHECK1-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !23 +// CHECK1-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !24 // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__2_EXIT]] // CHECK1: .omp_outlined..2.exit: -// CHECK1-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !23 +// CHECK1-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !24 // CHECK1-NEXT: ret i32 0 // // @@ -1008,36 +1008,36 @@ for (int i = 0; i < argc; ++i) { // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META4:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 // CHECK2-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 4) #[[ATTR2:[0-9]+]] // CHECK2-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK2-NEXT: br i1 [[TMP13]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]] // CHECK2: .cancel.exit.i: -// CHECK2-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK2-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT:%.*]] // CHECK2: .cancel.continue.i: // CHECK2-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 4) #[[ATTR2]] // CHECK2-NEXT: [[TMP15:%.*]] = icmp ne i32 [[TMP14]], 0 // CHECK2-NEXT: br i1 [[TMP15]], label [[DOTCANCEL_EXIT1_I:%.*]], label [[DOTCANCEL_CONTINUE2_I:%.*]] // CHECK2: .cancel.exit1.i: -// CHECK2-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK2-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK2: .cancel.continue2.i: -// CHECK2-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK2-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK2: .omp_outlined..1.exit: -// CHECK2-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !13 +// CHECK2-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !14 // CHECK2-NEXT: ret i32 0 // // @@ -1063,29 +1063,29 @@ for (int i = 0; i < argc; ++i) { // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.1* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !23 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !23 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 // CHECK2-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 4) #[[ATTR2]] // CHECK2-NEXT: [[TMP13:%.*]] = icmp ne i32 [[TMP12]], 0 // CHECK2-NEXT: br i1 [[TMP13]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]] // CHECK2: .cancel.exit.i: -// CHECK2-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !23 +// CHECK2-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !24 // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__2_EXIT:%.*]] // CHECK2: .cancel.continue.i: -// CHECK2-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !23 +// CHECK2-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !24 // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__2_EXIT]] // CHECK2: .omp_outlined..2.exit: -// CHECK2-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !23 +// CHECK2-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !24 // CHECK2-NEXT: ret i32 0 // // diff --git a/clang/test/OpenMP/declare_target_codegen_globalization.cpp b/clang/test/OpenMP/declare_target_codegen_globalization.cpp index 12467cf6e2c1c..acde2937a8459 100644 --- a/clang/test/OpenMP/declare_target_codegen_globalization.cpp +++ b/clang/test/OpenMP/declare_target_codegen_globalization.cpp @@ -33,7 +33,6 @@ int maini1() { // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK1-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK1: .execute: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) @@ -81,39 +80,9 @@ int maini1() { // CHECK1-LABEL: define {{[^@]+}}@_Z3barv // CHECK1-SAME: () #[[ATTR2]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[A2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) -// CHECK1-NEXT: [[TMP1:%.*]] = call i16 @__kmpc_parallel_level(%struct.ident_t* @[[GLOB2]], i32 [[TMP0]]) -// CHECK1-NEXT: [[TMP2:%.*]] = icmp eq i16 [[TMP1]], 0 -// CHECK1-NEXT: [[TMP3:%.*]] = call i8 @__kmpc_is_spmd_exec_mode() #[[ATTR3:[0-9]+]] -// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK1-NEXT: br i1 [[TMP4]], label [[DOTSPMD:%.*]], label [[DOTNON_SPMD:%.*]] -// CHECK1: .spmd: -// CHECK1-NEXT: br label [[DOTEXIT:%.*]] -// CHECK1: .non-spmd: -// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], i64 4, i64 128 -// CHECK1-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_data_sharing_coalesced_push_stack(i64 [[TMP5]], i16 0) -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to %struct._globalized_locals_ty* -// CHECK1-NEXT: br label [[DOTEXIT]] -// CHECK1: .exit: -// CHECK1-NEXT: [[_SELECT_STACK:%.*]] = phi %struct._globalized_locals_ty* [ null, [[DOTSPMD]] ], [ [[TMP7]], [[DOTNON_SPMD]] ] -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast %struct._globalized_locals_ty* [[_SELECT_STACK]] to %struct._globalized_locals_ty.0* -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[_SELECT_STACK]], i32 0, i32 0 -// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID]], 31 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], [32 x i32]* [[A]], i32 0, i32 [[NVPTX_LANE_ID]] -// CHECK1-NEXT: [[A1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP8]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP10:%.*]] = select i1 [[TMP2]], i32* [[A1]], i32* [[TMP9]] -// CHECK1-NEXT: [[TMP11:%.*]] = select i1 [[TMP4]], i32* [[A2]], i32* [[TMP10]] -// CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3fooRi(i32* nonnull align 4 dereferenceable(4) [[TMP11]]) #[[ATTR4]] -// CHECK1-NEXT: store i32 [[CALL]], i32* [[RETVAL]], align 4 -// CHECK1-NEXT: br i1 [[TMP4]], label [[DOTEXIT4:%.*]], label [[DOTNON_SPMD3:%.*]] -// CHECK1: .non-spmd3: -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast %struct._globalized_locals_ty* [[_SELECT_STACK]] to i8* -// CHECK1-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP12]]) -// CHECK1-NEXT: br label [[DOTEXIT4]] -// CHECK1: .exit4: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP13]] +// CHECK1-NEXT: [[A:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: [[A_ON_STACK:%.*]] = bitcast i8* [[A]] to i32* +// CHECK1-NEXT: [[CALL:%.*]] = call i32 @_Z3fooRi(i32* nonnull align 4 dereferenceable(4) [[A_ON_STACK]]) #[[ATTR4]] +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[A]]) +// CHECK1-NEXT: ret i32 [[CALL]] // diff --git a/clang/test/OpenMP/distribute_codegen.cpp b/clang/test/OpenMP/distribute_codegen.cpp index b8d70cc948eae..234db93583a5a 100644 --- a/clang/test/OpenMP/distribute_codegen.cpp +++ b/clang/test/OpenMP/distribute_codegen.cpp @@ -596,45 +596,45 @@ int fint(void) { return ftemplate(); } // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 // CHECK1-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM3]] -// CHECK1-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK1-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 // CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM6]] -// CHECK1-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK1-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 // CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM9]] -// CHECK1-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -905,23 +905,23 @@ int fint(void) { return ftemplate(); } // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -1426,45 +1426,45 @@ int fint(void) { return ftemplate(); } // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 // CHECK2-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK2-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !9 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !9 -// CHECK2-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !9 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 // CHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM3]] -// CHECK2-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK2-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !9 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 // CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM6]] -// CHECK2-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK2-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !9 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 // CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM9]] -// CHECK2-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !10 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 -// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -1735,23 +1735,23 @@ int fint(void) { return ftemplate(); } // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -2248,41 +2248,41 @@ int fint(void) { return ftemplate(); } // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 // CHECK3-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i32 [[TMP15]] -// CHECK3-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 [[TMP18]] -// CHECK3-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK3-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK3-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -2553,23 +2553,23 @@ int fint(void) { return ftemplate(); } // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -3066,41 +3066,41 @@ int fint(void) { return ftemplate(); } // CHECK4: omp.dispatch.body: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 // CHECK4-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK4-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !10 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i32 [[TMP15]] -// CHECK4-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK4-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !10 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 [[TMP18]] -// CHECK4-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK4-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !10 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 [[TMP21]] -// CHECK4-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK4-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !10 -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP23]], i32 [[TMP24]] -// CHECK4-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !11 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 -// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -3371,23 +3371,23 @@ int fint(void) { return ftemplate(); } // CHECK4: omp.dispatch.body: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -3892,45 +3892,45 @@ int fint(void) { return ftemplate(); } // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK5-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 // CHECK5-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK5-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !10 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK5-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]] -// CHECK5-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK5-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !10 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK5-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 // CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM3]] -// CHECK5-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !10 // CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK5-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !10 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 // CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM6]] -// CHECK5-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !10 // CHECK5-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK5-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !10 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK5-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 // CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM9]] -// CHECK5-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !10 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK5-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: @@ -4201,23 +4201,23 @@ int fint(void) { return ftemplate(); } // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: @@ -4722,45 +4722,45 @@ int fint(void) { return ftemplate(); } // CHECK6: omp.dispatch.body: // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK6-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] // CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK6-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 // CHECK6-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK6-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !9 -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK6-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !10 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK6-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]] -// CHECK6-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !9 -// CHECK6-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !9 -// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK6-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !10 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK6-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 // CHECK6-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM3]] -// CHECK6-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !10 // CHECK6-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK6-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !9 -// CHECK6-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !10 +// CHECK6-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK6-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 // CHECK6-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM6]] -// CHECK6-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !10 // CHECK6-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK6-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !9 -// CHECK6-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !10 +// CHECK6-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK6-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 // CHECK6-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM9]] -// CHECK6-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !10 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK6-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 -// CHECK6-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: @@ -5031,23 +5031,23 @@ int fint(void) { return ftemplate(); } // CHECK6: omp.dispatch.body: // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK6-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: @@ -5544,41 +5544,41 @@ int fint(void) { return ftemplate(); } // CHECK7: omp.dispatch.body: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK7-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] // CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK7-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 // CHECK7-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK7-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !10 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i32 [[TMP15]] -// CHECK7-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK7-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !10 -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK7-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 [[TMP18]] -// CHECK7-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !11 // CHECK7-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK7-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !10 -// CHECK7-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK7-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 [[TMP21]] -// CHECK7-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !11 // CHECK7-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK7-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !10 -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK7-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP23]], i32 [[TMP24]] -// CHECK7-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !11 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK7-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 -// CHECK7-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: @@ -5849,23 +5849,23 @@ int fint(void) { return ftemplate(); } // CHECK7: omp.dispatch.body: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK7-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: @@ -6362,41 +6362,41 @@ int fint(void) { return ftemplate(); } // CHECK8: omp.dispatch.body: // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK8-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] // CHECK8-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK8-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 // CHECK8-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK8-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !10 -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 +// CHECK8-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !11 +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i32 [[TMP15]] -// CHECK8-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK8-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !10 -// CHECK8-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK8-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !11 +// CHECK8-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK8-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 [[TMP18]] -// CHECK8-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !11 // CHECK8-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK8-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !10 -// CHECK8-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !11 +// CHECK8-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK8-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 [[TMP21]] -// CHECK8-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !11 // CHECK8-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK8-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !10 -// CHECK8-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !11 +// CHECK8-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK8-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP23]], i32 [[TMP24]] -// CHECK8-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !11 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK8-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 -// CHECK8-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK8: omp.dispatch.inc: @@ -6667,23 +6667,23 @@ int fint(void) { return ftemplate(); } // CHECK8: omp.dispatch.body: // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK8-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK8-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK8: omp.dispatch.inc: @@ -6996,45 +6996,45 @@ int fint(void) { return ftemplate(); } // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK17-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK17-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 // CHECK17-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !11 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK17-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]] -// CHECK17-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !11 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK17-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 // CHECK17-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM3]] -// CHECK17-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !11 // CHECK17-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK17-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !11 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK17-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 // CHECK17-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM6]] -// CHECK17-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !11 // CHECK17-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK17-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !9 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !11 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK17-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 // CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM9]] -// CHECK17-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !11 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK17-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 -// CHECK17-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: @@ -7213,23 +7213,23 @@ int fint(void) { return ftemplate(); } // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: @@ -7535,45 +7535,45 @@ int fint(void) { return ftemplate(); } // CHECK18: omp.dispatch.body: // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK18-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] // CHECK18-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK18-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 // CHECK18-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 -// CHECK18-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !9 -// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 +// CHECK18-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !11 +// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK18-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]] -// CHECK18-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !9 -// CHECK18-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !9 -// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK18-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK18-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !11 +// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK18-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 // CHECK18-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM3]] -// CHECK18-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !9 +// CHECK18-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !11 // CHECK18-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK18-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !9 -// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK18-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !11 +// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK18-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 // CHECK18-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM6]] -// CHECK18-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !9 +// CHECK18-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !11 // CHECK18-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK18-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !9 -// CHECK18-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK18-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !11 +// CHECK18-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK18-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 // CHECK18-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM9]] -// CHECK18-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !9 +// CHECK18-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !11 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK18-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK18-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 -// CHECK18-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: @@ -7752,23 +7752,23 @@ int fint(void) { return ftemplate(); } // CHECK18: omp.dispatch.body: // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK18-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK18-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK18-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: @@ -8066,41 +8066,41 @@ int fint(void) { return ftemplate(); } // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK19-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] // CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK19-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 // CHECK19-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i32 [[TMP15]] -// CHECK19-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK19-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 [[TMP18]] -// CHECK19-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !12 // CHECK19-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK19-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK19-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 [[TMP21]] -// CHECK19-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !12 // CHECK19-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK19-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK19-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP23]], i32 [[TMP24]] -// CHECK19-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !12 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK19-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 -// CHECK19-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: @@ -8279,23 +8279,23 @@ int fint(void) { return ftemplate(); } // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK19-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: @@ -8593,41 +8593,41 @@ int fint(void) { return ftemplate(); } // CHECK20: omp.dispatch.body: // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK20-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] // CHECK20-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK20-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 // CHECK20-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 -// CHECK20-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !10 -// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 +// CHECK20-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !12 +// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i32 [[TMP15]] -// CHECK20-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK20-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !10 -// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK20-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK20-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !12 +// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK20-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 [[TMP18]] -// CHECK20-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !10 +// CHECK20-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !12 // CHECK20-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK20-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !10 -// CHECK20-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK20-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !12 +// CHECK20-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK20-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 [[TMP21]] -// CHECK20-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !10 +// CHECK20-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !12 // CHECK20-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK20-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !10 -// CHECK20-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK20-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !12 +// CHECK20-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK20-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP23]], i32 [[TMP24]] -// CHECK20-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !10 +// CHECK20-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !12 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK20-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK20-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 -// CHECK20-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK20: omp.dispatch.inc: @@ -8806,23 +8806,23 @@ int fint(void) { return ftemplate(); } // CHECK20: omp.dispatch.body: // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK20-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK20-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK20-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK20-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK20: omp.dispatch.inc: @@ -8839,4 +8839,3 @@ int fint(void) { return ftemplate(); } // CHECK20-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) // CHECK20-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp index 909b338a18e5f..2034f82e25d5e 100644 --- a/clang/test/OpenMP/distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_codegen.cpp @@ -2247,48 +2247,48 @@ int main() { // CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP21]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 // CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP24]], i64 [[IDXPROM6]] -// CHECK1-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX7]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX7]], align 8, !llvm.access.group !12 // CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK1-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 // CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP27]], i64 [[IDXPROM9]] -// CHECK1-NEXT: store double [[ADD8]], double* [[ARRAYIDX10]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: store double [[ADD8]], double* [[ARRAYIDX10]], align 8, !llvm.access.group !12 // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP29]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP29]], align 8, !llvm.access.group !12 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP30]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP30]], align 8, !llvm.access.group !12 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP31]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP31]], align 8, !llvm.access.group !12 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP32]], align 8, !llvm.access.group !11 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(%class.anon.5* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !11 +// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP32]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(%class.anon.5* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !12 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP33]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -2501,48 +2501,48 @@ int main() { // CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP22]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP26]] to i64 // CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds double, double* [[TMP25]], i64 [[IDXPROM8]] -// CHECK1-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX9]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX9]], align 8, !llvm.access.group !15 // CHECK1-NEXT: [[ADD10:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK1-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP29]] to i64 // CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP28]], i64 [[IDXPROM11]] -// CHECK1-NEXT: store double [[ADD10]], double* [[ARRAYIDX12]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: store double [[ADD10]], double* [[ARRAYIDX12]], align 8, !llvm.access.group !15 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP30]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP30]], align 8, !llvm.access.group !15 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[I6]], i32** [[TMP31]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: store i32* [[I6]], i32** [[TMP31]], align 8, !llvm.access.group !15 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP32]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP32]], align 8, !llvm.access.group !15 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP33]], align 8, !llvm.access.group !14 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(%class.anon.6* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !14 +// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP33]], align 8, !llvm.access.group !15 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(%class.anon.6* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP34]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -4040,48 +4040,48 @@ int main() { // CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !11 -// CHECK2-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !11 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP21]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !11 -// CHECK2-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !11 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 // CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP24]], i64 [[IDXPROM6]] -// CHECK2-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX7]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX7]], align 8, !llvm.access.group !12 // CHECK2-NEXT: [[ADD8:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK2-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !11 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 // CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP27]], i64 [[IDXPROM9]] -// CHECK2-NEXT: store double [[ADD8]], double* [[ARRAYIDX10]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: store double [[ADD8]], double* [[ARRAYIDX10]], align 8, !llvm.access.group !12 // CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 0 -// CHECK2-NEXT: store double** [[TMP1]], double*** [[TMP29]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: store double** [[TMP1]], double*** [[TMP29]], align 8, !llvm.access.group !12 // CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 1 -// CHECK2-NEXT: store i32* [[I4]], i32** [[TMP30]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: store i32* [[I4]], i32** [[TMP30]], align 8, !llvm.access.group !12 // CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 2 -// CHECK2-NEXT: store double** [[TMP2]], double*** [[TMP31]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: store double** [[TMP2]], double*** [[TMP31]], align 8, !llvm.access.group !12 // CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 3 -// CHECK2-NEXT: store double** [[TMP3]], double*** [[TMP32]], align 8, !llvm.access.group !11 -// CHECK2-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(%class.anon.5* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !11 +// CHECK2-NEXT: store double** [[TMP3]], double*** [[TMP32]], align 8, !llvm.access.group !12 +// CHECK2-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(%class.anon.5* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !12 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP33]], 1 -// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -4294,48 +4294,48 @@ int main() { // CHECK2-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !14 -// CHECK2-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !14 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP22]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !14 -// CHECK2-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !14 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP26]] to i64 // CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds double, double* [[TMP25]], i64 [[IDXPROM8]] -// CHECK2-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX9]], align 8, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX9]], align 8, !llvm.access.group !15 // CHECK2-NEXT: [[ADD10:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK2-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !14 -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP29]] to i64 // CHECK2-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP28]], i64 [[IDXPROM11]] -// CHECK2-NEXT: store double [[ADD10]], double* [[ARRAYIDX12]], align 8, !llvm.access.group !14 +// CHECK2-NEXT: store double [[ADD10]], double* [[ARRAYIDX12]], align 8, !llvm.access.group !15 // CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 0 -// CHECK2-NEXT: store double** [[TMP1]], double*** [[TMP30]], align 8, !llvm.access.group !14 +// CHECK2-NEXT: store double** [[TMP1]], double*** [[TMP30]], align 8, !llvm.access.group !15 // CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 1 -// CHECK2-NEXT: store i32* [[I6]], i32** [[TMP31]], align 8, !llvm.access.group !14 +// CHECK2-NEXT: store i32* [[I6]], i32** [[TMP31]], align 8, !llvm.access.group !15 // CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 2 -// CHECK2-NEXT: store double** [[TMP2]], double*** [[TMP32]], align 8, !llvm.access.group !14 +// CHECK2-NEXT: store double** [[TMP2]], double*** [[TMP32]], align 8, !llvm.access.group !15 // CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 3 -// CHECK2-NEXT: store double** [[TMP3]], double*** [[TMP33]], align 8, !llvm.access.group !14 -// CHECK2-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(%class.anon.6* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !14 +// CHECK2-NEXT: store double** [[TMP3]], double*** [[TMP33]], align 8, !llvm.access.group !15 +// CHECK2-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(%class.anon.6* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !15 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP34]], 1 -// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -5781,45 +5781,45 @@ int main() { // CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP21]], i32 [[TMP22]] -// CHECK3-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX5]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX5]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[ADD6:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK3-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP27]], i32 [[TMP28]] -// CHECK3-NEXT: store double [[ADD6]], double* [[ARRAYIDX7]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store double [[ADD6]], double* [[ARRAYIDX7]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP29]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP29]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP30]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP30]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP31]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP31]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP32]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(%class.anon.5* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !12 +// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP32]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(%class.anon.5* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !13 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -6024,45 +6024,45 @@ int main() { // CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP22]], i32 [[TMP23]] -// CHECK3-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP25]], i32 [[TMP26]] -// CHECK3-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK3-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP28]], i32 [[TMP29]] -// CHECK3-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP30]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP30]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[I4]], i32** [[TMP31]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store i32* [[I4]], i32** [[TMP31]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP32]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP32]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP33]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(%class.anon.6* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !15 +// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP33]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(%class.anon.6* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !16 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP34]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -7508,45 +7508,45 @@ int main() { // CHECK4-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK4-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !12 -// CHECK4-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !12 -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP21]], i32 [[TMP22]] -// CHECK4-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !12 -// CHECK4-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !12 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 [[TMP25]] -// CHECK4-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX5]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX5]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[ADD6:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK4-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !12 -// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP27]], i32 [[TMP28]] -// CHECK4-NEXT: store double [[ADD6]], double* [[ARRAYIDX7]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: store double [[ADD6]], double* [[ARRAYIDX7]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 0 -// CHECK4-NEXT: store double** [[TMP1]], double*** [[TMP29]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: store double** [[TMP1]], double*** [[TMP29]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 1 -// CHECK4-NEXT: store i32* [[I3]], i32** [[TMP30]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: store i32* [[I3]], i32** [[TMP30]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 2 -// CHECK4-NEXT: store double** [[TMP2]], double*** [[TMP31]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: store double** [[TMP2]], double*** [[TMP31]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 3 -// CHECK4-NEXT: store double** [[TMP3]], double*** [[TMP32]], align 4, !llvm.access.group !12 -// CHECK4-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(%class.anon.5* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !12 +// CHECK4-NEXT: store double** [[TMP3]], double*** [[TMP32]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(%class.anon.5* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !13 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], 1 -// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -7751,45 +7751,45 @@ int main() { // CHECK4-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !15 -// CHECK4-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !15 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP22]], i32 [[TMP23]] -// CHECK4-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !15 -// CHECK4-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !15 -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP25]], i32 [[TMP26]] -// CHECK4-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[ADD7:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK4-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !15 -// CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP28]], i32 [[TMP29]] -// CHECK4-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 0 -// CHECK4-NEXT: store double** [[TMP1]], double*** [[TMP30]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: store double** [[TMP1]], double*** [[TMP30]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 1 -// CHECK4-NEXT: store i32* [[I4]], i32** [[TMP31]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: store i32* [[I4]], i32** [[TMP31]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 2 -// CHECK4-NEXT: store double** [[TMP2]], double*** [[TMP32]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: store double** [[TMP2]], double*** [[TMP32]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 3 -// CHECK4-NEXT: store double** [[TMP3]], double*** [[TMP33]], align 4, !llvm.access.group !15 -// CHECK4-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(%class.anon.6* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !15 +// CHECK4-NEXT: store double** [[TMP3]], double*** [[TMP33]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(%class.anon.6* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !16 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP34]], 1 -// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -9726,39 +9726,39 @@ int main() { // CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !18 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !19 +// CHECK9-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !19 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP21]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !18 -// CHECK9-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !18 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !19 +// CHECK9-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !19 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 // CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 // CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP24]], i64 [[IDXPROM6]] -// CHECK9-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX7]], align 8, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX7]], align 8, !llvm.access.group !19 // CHECK9-NEXT: [[ADD8:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK9-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !18 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !19 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 // CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 // CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP27]], i64 [[IDXPROM9]] -// CHECK9-NEXT: store double [[ADD8]], double* [[ARRAYIDX10]], align 8, !llvm.access.group !18 +// CHECK9-NEXT: store double [[ADD8]], double* [[ARRAYIDX10]], align 8, !llvm.access.group !19 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP29]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: @@ -9970,39 +9970,39 @@ int main() { // CHECK9-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !21 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !22 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP22]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !21 -// CHECK9-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !21 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !22 // CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP26]] to i64 // CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds double, double* [[TMP25]], i64 [[IDXPROM8]] -// CHECK9-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX9]], align 8, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX9]], align 8, !llvm.access.group !22 // CHECK9-NEXT: [[ADD10:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK9-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !21 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !22 // CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP29]] to i64 // CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP28]], i64 [[IDXPROM11]] -// CHECK9-NEXT: store double [[ADD10]], double* [[ARRAYIDX12]], align 8, !llvm.access.group !21 +// CHECK9-NEXT: store double [[ADD10]], double* [[ARRAYIDX12]], align 8, !llvm.access.group !22 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP30]], 1 -// CHECK9-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: @@ -11944,39 +11944,39 @@ int main() { // CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !25 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !25 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !25 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !25 // CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 // CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[TMP24]], i64 [[IDXPROM6]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4, !llvm.access.group !25 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP26]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !24 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !25 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !25 // CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 // CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[TMP27]], i64 [[IDXPROM9]] -// CHECK9-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX10]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX10]], align 4, !llvm.access.group !25 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP29]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: @@ -12188,39 +12188,39 @@ int main() { // CHECK9-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 // CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !27 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !28 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP22]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !27 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !28 // CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP26]] to i64 // CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[TMP25]], i64 [[IDXPROM8]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4, !llvm.access.group !28 // CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP27]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !27 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !28 // CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP29]] to i64 // CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[TMP28]], i64 [[IDXPROM11]] -// CHECK9-NEXT: store i32 [[ADD10]], i32* [[ARRAYIDX12]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: store i32 [[ADD10]], i32* [[ARRAYIDX12]], align 4, !llvm.access.group !28 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP30]], 1 -// CHECK9-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: @@ -14157,39 +14157,39 @@ int main() { // CHECK10-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !18 -// CHECK10-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !18 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !19 +// CHECK10-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !19 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP21]], i64 [[IDXPROM]] -// CHECK10-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !18 -// CHECK10-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !18 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !19 +// CHECK10-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !19 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 // CHECK10-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 // CHECK10-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP24]], i64 [[IDXPROM6]] -// CHECK10-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX7]], align 8, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX7]], align 8, !llvm.access.group !19 // CHECK10-NEXT: [[ADD8:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK10-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !18 -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !19 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 // CHECK10-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 // CHECK10-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP27]], i64 [[IDXPROM9]] -// CHECK10-NEXT: store double [[ADD8]], double* [[ARRAYIDX10]], align 8, !llvm.access.group !18 +// CHECK10-NEXT: store double [[ADD8]], double* [[ARRAYIDX10]], align 8, !llvm.access.group !19 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK10-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP29]], 1 -// CHECK10-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: @@ -14401,39 +14401,39 @@ int main() { // CHECK10-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !21 -// CHECK10-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !21 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !21 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !22 +// CHECK10-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !22 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !22 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP22]], i64 [[IDXPROM]] -// CHECK10-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !21 -// CHECK10-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !21 -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !21 +// CHECK10-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !22 +// CHECK10-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !22 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !22 // CHECK10-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP26]] to i64 // CHECK10-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds double, double* [[TMP25]], i64 [[IDXPROM8]] -// CHECK10-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX9]], align 8, !llvm.access.group !21 +// CHECK10-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX9]], align 8, !llvm.access.group !22 // CHECK10-NEXT: [[ADD10:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK10-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !21 -// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !21 +// CHECK10-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !22 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !22 // CHECK10-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP29]] to i64 // CHECK10-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP28]], i64 [[IDXPROM11]] -// CHECK10-NEXT: store double [[ADD10]], double* [[ARRAYIDX12]], align 8, !llvm.access.group !21 +// CHECK10-NEXT: store double [[ADD10]], double* [[ARRAYIDX12]], align 8, !llvm.access.group !22 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK10-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP30]], 1 -// CHECK10-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: @@ -16375,39 +16375,39 @@ int main() { // CHECK10-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !24 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !24 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !25 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !25 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !25 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i64 [[IDXPROM]] -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 -// CHECK10-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !24 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !25 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !25 // CHECK10-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 // CHECK10-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[TMP24]], i64 [[IDXPROM6]] -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4, !llvm.access.group !25 // CHECK10-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP26]] -// CHECK10-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !24 -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !25 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !25 // CHECK10-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 // CHECK10-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[TMP27]], i64 [[IDXPROM9]] -// CHECK10-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX10]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX10]], align 4, !llvm.access.group !25 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK10-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP29]], 1 -// CHECK10-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: @@ -16619,39 +16619,39 @@ int main() { // CHECK10-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 // CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !27 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !27 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !28 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !28 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !28 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP22]], i64 [[IDXPROM]] -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !27 -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !28 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !28 // CHECK10-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP26]] to i64 // CHECK10-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[TMP25]], i64 [[IDXPROM8]] -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4, !llvm.access.group !28 // CHECK10-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP27]] -// CHECK10-NEXT: [[TMP28:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !27 -// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !28 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !28 // CHECK10-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP29]] to i64 // CHECK10-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[TMP28]], i64 [[IDXPROM11]] -// CHECK10-NEXT: store i32 [[ADD10]], i32* [[ARRAYIDX12]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: store i32 [[ADD10]], i32* [[ARRAYIDX12]], align 4, !llvm.access.group !28 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK10-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP30]], 1 -// CHECK10-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: @@ -18526,36 +18526,36 @@ int main() { // CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 // CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !20 +// CHECK11-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !20 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !20 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP21]], i32 [[TMP22]] -// CHECK11-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !20 +// CHECK11-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !20 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !20 // CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX5]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX5]], align 4, !llvm.access.group !20 // CHECK11-NEXT: [[ADD6:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !20 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !20 // CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP27]], i32 [[TMP28]] -// CHECK11-NEXT: store double [[ADD6]], double* [[ARRAYIDX7]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: store double [[ADD6]], double* [[ARRAYIDX7]], align 4, !llvm.access.group !20 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 -// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: @@ -18759,36 +18759,36 @@ int main() { // CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !23 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP22]], i32 [[TMP23]] -// CHECK11-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !23 // CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP25]], i32 [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !23 // CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !23 // CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP28]], i32 [[TMP29]] -// CHECK11-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !23 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: @@ -20668,36 +20668,36 @@ int main() { // CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 // CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !26 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i32 [[TMP22]] -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !26 // CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[TMP24]], i32 [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4, !llvm.access.group !26 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !26 // CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[TMP27]], i32 [[TMP28]] -// CHECK11-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX7]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX7]], align 4, !llvm.access.group !26 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 -// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: @@ -20901,36 +20901,36 @@ int main() { // CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !29 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP22]], i32 [[TMP23]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !29 // CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP25]], i32 [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group !29 // CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], [[TMP27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !29 // CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP28]], i32 [[TMP29]] -// CHECK11-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !29 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: @@ -22805,36 +22805,36 @@ int main() { // CHECK12-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 // CHECK12-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK12-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !19 -// CHECK12-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !19 -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !20 +// CHECK12-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !20 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !20 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP21]], i32 [[TMP22]] -// CHECK12-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !19 -// CHECK12-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !19 -// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !20 +// CHECK12-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !20 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !20 // CHECK12-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 [[TMP25]] -// CHECK12-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX5]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX5]], align 4, !llvm.access.group !20 // CHECK12-NEXT: [[ADD6:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK12-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !19 -// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !20 +// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !20 // CHECK12-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP27]], i32 [[TMP28]] -// CHECK12-NEXT: store double [[ADD6]], double* [[ARRAYIDX7]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: store double [[ADD6]], double* [[ARRAYIDX7]], align 4, !llvm.access.group !20 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 -// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK12: omp.dispatch.inc: @@ -23038,36 +23038,36 @@ int main() { // CHECK12-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !22 -// CHECK12-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !22 -// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !23 +// CHECK12-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !23 +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !23 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP22]], i32 [[TMP23]] -// CHECK12-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !22 -// CHECK12-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !22 -// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !23 +// CHECK12-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !23 +// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !23 // CHECK12-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP25]], i32 [[TMP26]] -// CHECK12-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !23 // CHECK12-NEXT: [[ADD7:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK12-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !22 -// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !23 +// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !23 // CHECK12-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP28]], i32 [[TMP29]] -// CHECK12-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !23 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK12-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 -// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK12: omp.dispatch.inc: @@ -24947,36 +24947,36 @@ int main() { // CHECK12-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 // CHECK12-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK12-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !25 -// CHECK12-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !25 -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !26 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !26 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !26 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i32 [[TMP22]] -// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 -// CHECK12-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !25 -// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !26 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !26 // CHECK12-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[TMP24]], i32 [[TMP25]] -// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4, !llvm.access.group !26 // CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP26]] -// CHECK12-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !25 -// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !26 +// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !26 // CHECK12-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[TMP27]], i32 [[TMP28]] -// CHECK12-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX7]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX7]], align 4, !llvm.access.group !26 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 -// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK12: omp.dispatch.inc: @@ -25180,36 +25180,36 @@ int main() { // CHECK12-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !28 -// CHECK12-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !28 -// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !29 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !29 +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !29 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP22]], i32 [[TMP23]] -// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 -// CHECK12-NEXT: [[TMP25:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !28 -// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !29 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !29 +// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !29 // CHECK12-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP25]], i32 [[TMP26]] -// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group !28 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group !29 // CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], [[TMP27]] -// CHECK12-NEXT: [[TMP28:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !28 -// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 +// CHECK12-NEXT: [[TMP28:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !29 +// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !29 // CHECK12-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP28]], i32 [[TMP29]] -// CHECK12-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !28 +// CHECK12-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !29 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK12-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 -// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK12: omp.dispatch.inc: @@ -25226,4 +25226,3 @@ int main() { // CHECK12-NEXT: call void @__tgt_register_requires(i64 1) // CHECK12-NEXT: ret void // -// diff --git a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp index 6f70fa8d83e5a..dec264ff9ea1c 100644 --- a/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_reduction_task_codegen.cpp @@ -548,26 +548,26 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !11 +// CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* @@ -601,8 +601,8 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] // CHECK1-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP48:%.*]] = getelementptr i8, i8* [[TMP40]], i64 [[TMP47]] -// CHECK1-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !11 +// CHECK1-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !12 // CHECK1-NEXT: ret i32 0 // // @@ -1167,26 +1167,26 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK2-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK2-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK2-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !11 +// CHECK2-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK2-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK2-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK2-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK2-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK2-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* @@ -1220,8 +1220,8 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] // CHECK2-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK2-NEXT: [[TMP48:%.*]] = getelementptr i8, i8* [[TMP40]], i64 [[TMP47]] -// CHECK2-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !11 +// CHECK2-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !12 // CHECK2-NEXT: ret i32 0 // // @@ -1273,4 +1273,3 @@ int main(int argc, char **argv) { // CHECK2: omp.arraycpy.done5: // CHECK2-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp index 328c620679a86..c9a16f2db634c 100644 --- a/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/distribute_parallel_for_simd_codegen.cpp @@ -876,23 +876,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -993,48 +993,48 @@ int main() { // CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 // CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !14 // CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 // CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] -// CHECK1-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8 +// CHECK1-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !14 // CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 8 +// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 8, !llvm.access.group !14 // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP29]], align 8 +// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP29]], align 8, !llvm.access.group !14 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 8 +// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 8, !llvm.access.group !14 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 8 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !14 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1140,23 +1140,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 // CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !19 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1257,48 +1257,48 @@ int main() { // CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !22 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !22 // CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 // CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !22 // CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !22 // CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 // CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] -// CHECK1-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8 +// CHECK1-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !22 // CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 8 +// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 8, !llvm.access.group !22 // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP29]], align 8 +// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP29]], align 8, !llvm.access.group !22 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 8 +// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 8, !llvm.access.group !22 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 8 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE0_clEv"(%class.anon.1* nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 8, !llvm.access.group !22 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE0_clEv"(%class.anon.1* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !22 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1411,47 +1411,47 @@ int main() { // CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !25 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 // CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]]), !llvm.access.group !25 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !25 // CHECK1-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] // CHECK1-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK1: cond.true10: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !25 // CHECK1-NEXT: br label [[COND_END12:%.*]] // CHECK1: cond.false11: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK1-NEXT: br label [[COND_END12]] // CHECK1: cond.end12: // CHECK1-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE10]] ], [ [[TMP32]], [[COND_FALSE11]] ] -// CHECK1-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK1-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1552,48 +1552,48 @@ int main() { // CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 // CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !28 +// CHECK1-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !28 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !28 +// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !28 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 // CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 // CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !28 // CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !28 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 // CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 // CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] -// CHECK1-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8 +// CHECK1-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !28 // CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 8 +// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 8, !llvm.access.group !28 // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP29]], align 8 +// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP29]], align 8, !llvm.access.group !28 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 8 +// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 8, !llvm.access.group !28 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 8 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE1_clEv"(%class.anon.2* nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 8, !llvm.access.group !28 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE1_clEv"(%class.anon.2* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !28 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1699,23 +1699,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 // CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 // CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 // CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !31 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1816,48 +1816,48 @@ int main() { // CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !34 // CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !34 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !34 // CHECK1-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 // CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !34 // CHECK1-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !34 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !34 // CHECK1-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 // CHECK1-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] -// CHECK1-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8 +// CHECK1-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !34 // CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 8 +// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 8, !llvm.access.group !34 // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP29]], align 8 +// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP29]], align 8, !llvm.access.group !34 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 8 +// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 8, !llvm.access.group !34 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 8 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE2_clEv"(%class.anon.3* nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 8, !llvm.access.group !34 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE2_clEv"(%class.anon.3* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !34 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1973,27 +1973,27 @@ int main() { // CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 // CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !37 // CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 // CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !37 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i64 [[TMP24]]) +// CHECK1-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4, !llvm.access.group !37 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !37 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i64 [[TMP24]]), !llvm.access.group !37 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !37 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -2108,48 +2108,48 @@ int main() { // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 // CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !40 +// CHECK1-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !40 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !40 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !40 +// CHECK1-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !40 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !40 // CHECK1-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 // CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM13]] -// CHECK1-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !40 // CHECK1-NEXT: [[ADD15:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK1-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !40 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !40 // CHECK1-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 // CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM16]] -// CHECK1-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8 +// CHECK1-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !40 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP31]], align 8 +// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP31]], align 8, !llvm.access.group !40 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[I6]], i32** [[TMP32]], align 8 +// CHECK1-NEXT: store i32* [[I6]], i32** [[TMP32]], align 8, !llvm.access.group !40 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP33]], align 8 +// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP33]], align 8, !llvm.access.group !40 // CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP34]], align 8 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE3_clEv"(%class.anon.4* nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP34]], align 8, !llvm.access.group !40 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE3_clEv"(%class.anon.4* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !40 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -2265,23 +2265,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 // CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 // CHECK1-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 // CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !43 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -2379,48 +2379,48 @@ int main() { // CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !46 // CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !23 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !46 +// CHECK1-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !46 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !46 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP21]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !23 -// CHECK1-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !23 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !46 +// CHECK1-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !46 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !46 // CHECK1-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 // CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP24]], i64 [[IDXPROM6]] -// CHECK1-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX7]], align 8, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX7]], align 8, !llvm.access.group !46 // CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK1-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !23 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !46 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !46 // CHECK1-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 // CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP27]], i64 [[IDXPROM9]] -// CHECK1-NEXT: store double [[ADD8]], double* [[ARRAYIDX10]], align 8, !llvm.access.group !23 +// CHECK1-NEXT: store double [[ADD8]], double* [[ARRAYIDX10]], align 8, !llvm.access.group !46 // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP29]], align 8, !llvm.access.group !23 +// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP29]], align 8, !llvm.access.group !46 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP30]], align 8, !llvm.access.group !23 +// CHECK1-NEXT: store i32* [[I4]], i32** [[TMP30]], align 8, !llvm.access.group !46 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP31]], align 8, !llvm.access.group !23 +// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP31]], align 8, !llvm.access.group !46 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP32]], align 8, !llvm.access.group !23 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(%class.anon.5* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !23 +// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP32]], align 8, !llvm.access.group !46 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(%class.anon.5* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !46 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP33]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -2535,27 +2535,27 @@ int main() { // CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !49 // CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !49 // CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !49 // CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !49 // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**, i64)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i64 [[TMP24]]) +// CHECK1-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4, !llvm.access.group !49 +// CHECK1-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !49 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**, i64)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i64 [[TMP24]]), !llvm.access.group !49 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !49 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -2657,48 +2657,48 @@ int main() { // CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !52 // CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !27 -// CHECK1-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !27 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !52 +// CHECK1-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !52 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !52 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP22]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !27 -// CHECK1-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !27 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !52 +// CHECK1-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !52 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !52 // CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP26]] to i64 // CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds double, double* [[TMP25]], i64 [[IDXPROM8]] -// CHECK1-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX9]], align 8, !llvm.access.group !27 +// CHECK1-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX9]], align 8, !llvm.access.group !52 // CHECK1-NEXT: [[ADD10:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK1-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !27 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !52 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !52 // CHECK1-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP29]] to i64 // CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP28]], i64 [[IDXPROM11]] -// CHECK1-NEXT: store double [[ADD10]], double* [[ARRAYIDX12]], align 8, !llvm.access.group !27 +// CHECK1-NEXT: store double [[ADD10]], double* [[ARRAYIDX12]], align 8, !llvm.access.group !52 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 0 -// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP30]], align 8, !llvm.access.group !27 +// CHECK1-NEXT: store double** [[TMP1]], double*** [[TMP30]], align 8, !llvm.access.group !52 // CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 1 -// CHECK1-NEXT: store i32* [[I6]], i32** [[TMP31]], align 8, !llvm.access.group !27 +// CHECK1-NEXT: store i32* [[I6]], i32** [[TMP31]], align 8, !llvm.access.group !52 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 2 -// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP32]], align 8, !llvm.access.group !27 +// CHECK1-NEXT: store double** [[TMP2]], double*** [[TMP32]], align 8, !llvm.access.group !52 // CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 3 -// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP33]], align 8, !llvm.access.group !27 -// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(%class.anon.6* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !27 +// CHECK1-NEXT: store double** [[TMP3]], double*** [[TMP33]], align 8, !llvm.access.group !52 +// CHECK1-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(%class.anon.6* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !52 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP34]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -2837,23 +2837,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !10 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2954,48 +2954,48 @@ int main() { // CHECK2-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK2-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 // CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK2-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !14 // CHECK2-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK2-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 // CHECK2-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] -// CHECK2-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8 +// CHECK2-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !14 // CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK2-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 8 +// CHECK2-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 8, !llvm.access.group !14 // CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK2-NEXT: store i32* [[I4]], i32** [[TMP29]], align 8 +// CHECK2-NEXT: store i32* [[I4]], i32** [[TMP29]], align 8, !llvm.access.group !14 // CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK2-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 8 +// CHECK2-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 8, !llvm.access.group !14 // CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK2-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 8 -// CHECK2-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK2-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 8, !llvm.access.group !14 +// CHECK2-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !14 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 -// CHECK2-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -3101,23 +3101,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 // CHECK2-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK2-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !19 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -3218,48 +3218,48 @@ int main() { // CHECK2-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !22 +// CHECK2-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !22 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !22 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK2-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !22 +// CHECK2-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !22 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !22 // CHECK2-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 // CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK2-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !22 // CHECK2-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK2-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !22 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !22 // CHECK2-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 // CHECK2-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] -// CHECK2-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8 +// CHECK2-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !22 // CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 -// CHECK2-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 8 +// CHECK2-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 8, !llvm.access.group !22 // CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 -// CHECK2-NEXT: store i32* [[I4]], i32** [[TMP29]], align 8 +// CHECK2-NEXT: store i32* [[I4]], i32** [[TMP29]], align 8, !llvm.access.group !22 // CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 -// CHECK2-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 8 +// CHECK2-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 8, !llvm.access.group !22 // CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 -// CHECK2-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 8 -// CHECK2-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE0_clEv"(%class.anon.1* nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK2-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 8, !llvm.access.group !22 +// CHECK2-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE0_clEv"(%class.anon.1* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !22 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 -// CHECK2-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -3372,47 +3372,47 @@ int main() { // CHECK2-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !25 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 // CHECK2-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK2-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]]), !llvm.access.group !25 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !25 // CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] // CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK2: cond.true10: -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !25 // CHECK2-NEXT: br label [[COND_END12:%.*]] // CHECK2: cond.false11: -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK2-NEXT: br label [[COND_END12]] // CHECK2: cond.end12: // CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE10]] ], [ [[TMP32]], [[COND_FALSE11]] ] -// CHECK2-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK2-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -3513,48 +3513,48 @@ int main() { // CHECK2-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 // CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !28 +// CHECK2-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !28 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK2-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !28 +// CHECK2-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !28 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 // CHECK2-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 // CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK2-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !28 // CHECK2-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK2-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !28 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 // CHECK2-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 // CHECK2-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] -// CHECK2-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8 +// CHECK2-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !28 // CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 0 -// CHECK2-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 8 +// CHECK2-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 8, !llvm.access.group !28 // CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 1 -// CHECK2-NEXT: store i32* [[I4]], i32** [[TMP29]], align 8 +// CHECK2-NEXT: store i32* [[I4]], i32** [[TMP29]], align 8, !llvm.access.group !28 // CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 2 -// CHECK2-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 8 +// CHECK2-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 8, !llvm.access.group !28 // CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 3 -// CHECK2-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 8 -// CHECK2-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE1_clEv"(%class.anon.2* nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK2-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 8, !llvm.access.group !28 +// CHECK2-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE1_clEv"(%class.anon.2* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !28 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 -// CHECK2-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -3660,23 +3660,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 // CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !31 // CHECK2-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !31 // CHECK2-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !31 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !31 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -3777,48 +3777,48 @@ int main() { // CHECK2-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !34 // CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !34 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK2-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !34 // CHECK2-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 // CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK2-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !34 // CHECK2-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK2-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !34 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !34 // CHECK2-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 // CHECK2-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] -// CHECK2-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8 +// CHECK2-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !34 // CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 0 -// CHECK2-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 8 +// CHECK2-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 8, !llvm.access.group !34 // CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 1 -// CHECK2-NEXT: store i32* [[I4]], i32** [[TMP29]], align 8 +// CHECK2-NEXT: store i32* [[I4]], i32** [[TMP29]], align 8, !llvm.access.group !34 // CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 2 -// CHECK2-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 8 +// CHECK2-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 8, !llvm.access.group !34 // CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 3 -// CHECK2-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 8 -// CHECK2-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE2_clEv"(%class.anon.3* nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK2-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 8, !llvm.access.group !34 +// CHECK2-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE2_clEv"(%class.anon.3* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !34 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP32]], 1 -// CHECK2-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -3934,27 +3934,27 @@ int main() { // CHECK2-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 // CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !37 // CHECK2-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !37 // CHECK2-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !37 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i64 [[TMP24]]) +// CHECK2-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4, !llvm.access.group !37 +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !37 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i64 [[TMP24]]), !llvm.access.group !37 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !37 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -4069,48 +4069,48 @@ int main() { // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 // CHECK2-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK2-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !40 +// CHECK2-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !40 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !40 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK2-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !40 +// CHECK2-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !40 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !40 // CHECK2-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 // CHECK2-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM13]] -// CHECK2-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !40 // CHECK2-NEXT: [[ADD15:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK2-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !40 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !40 // CHECK2-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 // CHECK2-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM16]] -// CHECK2-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8 +// CHECK2-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !40 // CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 0 -// CHECK2-NEXT: store double** [[TMP1]], double*** [[TMP31]], align 8 +// CHECK2-NEXT: store double** [[TMP1]], double*** [[TMP31]], align 8, !llvm.access.group !40 // CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 1 -// CHECK2-NEXT: store i32* [[I6]], i32** [[TMP32]], align 8 +// CHECK2-NEXT: store i32* [[I6]], i32** [[TMP32]], align 8, !llvm.access.group !40 // CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 2 -// CHECK2-NEXT: store double** [[TMP2]], double*** [[TMP33]], align 8 +// CHECK2-NEXT: store double** [[TMP2]], double*** [[TMP33]], align 8, !llvm.access.group !40 // CHECK2-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 3 -// CHECK2-NEXT: store double** [[TMP3]], double*** [[TMP34]], align 8 -// CHECK2-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE3_clEv"(%class.anon.4* nonnull align 8 dereferenceable(32) [[REF_TMP]]) +// CHECK2-NEXT: store double** [[TMP3]], double*** [[TMP34]], align 8, !llvm.access.group !40 +// CHECK2-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE3_clEv"(%class.anon.4* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !40 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK2-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -4226,23 +4226,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 // CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !43 // CHECK2-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !43 // CHECK2-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !43 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !43 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -4340,48 +4340,48 @@ int main() { // CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !46 // CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !23 -// CHECK2-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !23 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !46 +// CHECK2-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !46 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !46 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP21]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !23 -// CHECK2-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !23 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !46 +// CHECK2-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !46 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !46 // CHECK2-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 // CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP24]], i64 [[IDXPROM6]] -// CHECK2-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX7]], align 8, !llvm.access.group !23 +// CHECK2-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX7]], align 8, !llvm.access.group !46 // CHECK2-NEXT: [[ADD8:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK2-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !23 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !46 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !46 // CHECK2-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 // CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP27]], i64 [[IDXPROM9]] -// CHECK2-NEXT: store double [[ADD8]], double* [[ARRAYIDX10]], align 8, !llvm.access.group !23 +// CHECK2-NEXT: store double [[ADD8]], double* [[ARRAYIDX10]], align 8, !llvm.access.group !46 // CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 0 -// CHECK2-NEXT: store double** [[TMP1]], double*** [[TMP29]], align 8, !llvm.access.group !23 +// CHECK2-NEXT: store double** [[TMP1]], double*** [[TMP29]], align 8, !llvm.access.group !46 // CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 1 -// CHECK2-NEXT: store i32* [[I4]], i32** [[TMP30]], align 8, !llvm.access.group !23 +// CHECK2-NEXT: store i32* [[I4]], i32** [[TMP30]], align 8, !llvm.access.group !46 // CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 2 -// CHECK2-NEXT: store double** [[TMP2]], double*** [[TMP31]], align 8, !llvm.access.group !23 +// CHECK2-NEXT: store double** [[TMP2]], double*** [[TMP31]], align 8, !llvm.access.group !46 // CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 3 -// CHECK2-NEXT: store double** [[TMP3]], double*** [[TMP32]], align 8, !llvm.access.group !23 -// CHECK2-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(%class.anon.5* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !23 +// CHECK2-NEXT: store double** [[TMP3]], double*** [[TMP32]], align 8, !llvm.access.group !46 +// CHECK2-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(%class.anon.5* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !46 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP33]], 1 -// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -4496,27 +4496,27 @@ int main() { // CHECK2-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !49 // CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !49 // CHECK2-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !49 // CHECK2-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !49 // CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**, i64)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i64 [[TMP24]]) +// CHECK2-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4, !llvm.access.group !49 +// CHECK2-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !49 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**, i64)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i64 [[TMP24]]), !llvm.access.group !49 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !49 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -4618,48 +4618,48 @@ int main() { // CHECK2-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !52 // CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !27 -// CHECK2-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !27 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !27 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !52 +// CHECK2-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !52 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !52 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP22]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !27 -// CHECK2-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !27 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !27 +// CHECK2-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !52 +// CHECK2-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !52 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !52 // CHECK2-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP26]] to i64 // CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds double, double* [[TMP25]], i64 [[IDXPROM8]] -// CHECK2-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX9]], align 8, !llvm.access.group !27 +// CHECK2-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX9]], align 8, !llvm.access.group !52 // CHECK2-NEXT: [[ADD10:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK2-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !27 -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !27 +// CHECK2-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !52 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !52 // CHECK2-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP29]] to i64 // CHECK2-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP28]], i64 [[IDXPROM11]] -// CHECK2-NEXT: store double [[ADD10]], double* [[ARRAYIDX12]], align 8, !llvm.access.group !27 +// CHECK2-NEXT: store double [[ADD10]], double* [[ARRAYIDX12]], align 8, !llvm.access.group !52 // CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 0 -// CHECK2-NEXT: store double** [[TMP1]], double*** [[TMP30]], align 8, !llvm.access.group !27 +// CHECK2-NEXT: store double** [[TMP1]], double*** [[TMP30]], align 8, !llvm.access.group !52 // CHECK2-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 1 -// CHECK2-NEXT: store i32* [[I6]], i32** [[TMP31]], align 8, !llvm.access.group !27 +// CHECK2-NEXT: store i32* [[I6]], i32** [[TMP31]], align 8, !llvm.access.group !52 // CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 2 -// CHECK2-NEXT: store double** [[TMP2]], double*** [[TMP32]], align 8, !llvm.access.group !27 +// CHECK2-NEXT: store double** [[TMP2]], double*** [[TMP32]], align 8, !llvm.access.group !52 // CHECK2-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 3 -// CHECK2-NEXT: store double** [[TMP3]], double*** [[TMP33]], align 8, !llvm.access.group !27 -// CHECK2-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(%class.anon.6* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !27 +// CHECK2-NEXT: store double** [[TMP3]], double*** [[TMP33]], align 8, !llvm.access.group !52 +// CHECK2-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(%class.anon.6* nonnull align 8 dereferenceable(32) [[REF_TMP]]), !llvm.access.group !52 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 // CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP34]], 1 -// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -4797,21 +4797,21 @@ int main() { // CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4910,45 +4910,45 @@ int main() { // CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] -// CHECK3-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4 +// CHECK3-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 4 +// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP29]], align 4 +// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP29]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 4 +// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 4 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 4 dereferenceable(16) [[REF_TMP]]) +// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !15 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -5053,21 +5053,21 @@ int main() { // CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !20 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -5166,45 +5166,45 @@ int main() { // CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] -// CHECK3-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4 +// CHECK3-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 4 +// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP29]], align 4 +// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP29]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 4 +// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 4 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE0_clEv"(%class.anon.1* nonnull align 4 dereferenceable(16) [[REF_TMP]]) +// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE0_clEv"(%class.anon.1* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !23 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -5315,45 +5315,45 @@ int main() { // CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]]), !llvm.access.group !26 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] // CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK3: cond.true10: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !26 // CHECK3-NEXT: br label [[COND_END12:%.*]] // CHECK3: cond.false11: -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK3-NEXT: br label [[COND_END12]] // CHECK3: cond.end12: // CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE10]] ], [ [[TMP30]], [[COND_FALSE11]] ] -// CHECK3-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK3-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -5452,45 +5452,45 @@ int main() { // CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] -// CHECK3-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4 +// CHECK3-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 4 +// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP29]], align 4 +// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP29]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 4 +// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 4 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE1_clEv"(%class.anon.2* nonnull align 4 dereferenceable(16) [[REF_TMP]]) +// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE1_clEv"(%class.anon.2* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !29 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -5595,21 +5595,21 @@ int main() { // CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !32 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -5708,45 +5708,45 @@ int main() { // CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] -// CHECK3-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4 +// CHECK3-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 4 +// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP29]], align 4 +// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP29]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 4 +// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 4 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE2_clEv"(%class.anon.3* nonnull align 4 dereferenceable(16) [[REF_TMP]]) +// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE2_clEv"(%class.anon.3* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !35 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -5860,24 +5860,24 @@ int main() { // CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i32 [[TMP22]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i32 [[TMP22]]), !llvm.access.group !38 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -5986,45 +5986,45 @@ int main() { // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 // CHECK3-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !41 +// CHECK3-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !41 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !41 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !41 +// CHECK3-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !41 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !41 // CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] -// CHECK3-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX8]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX8]], align 4, !llvm.access.group !41 // CHECK3-NEXT: [[ADD9:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK3-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !41 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !41 // CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP29]], i32 [[TMP30]] -// CHECK3-NEXT: store double [[ADD9]], double* [[ARRAYIDX10]], align 4 +// CHECK3-NEXT: store double [[ADD9]], double* [[ARRAYIDX10]], align 4, !llvm.access.group !41 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP31]], align 4 +// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP31]], align 4, !llvm.access.group !41 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[I4]], i32** [[TMP32]], align 4 +// CHECK3-NEXT: store i32* [[I4]], i32** [[TMP32]], align 4, !llvm.access.group !41 // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP33]], align 4 +// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP33]], align 4, !llvm.access.group !41 // CHECK3-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP34]], align 4 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE3_clEv"(%class.anon.4* nonnull align 4 dereferenceable(16) [[REF_TMP]]) +// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP34]], align 4, !llvm.access.group !41 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE3_clEv"(%class.anon.4* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !41 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK3-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -6139,21 +6139,21 @@ int main() { // CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 // CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !44 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !44 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !44 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -6249,45 +6249,45 @@ int main() { // CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !24 -// CHECK3-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !24 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP21]], i32 [[TMP22]] -// CHECK3-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !24 -// CHECK3-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !24 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 [[TMP25]] -// CHECK3-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX5]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX5]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[ADD6:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK3-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !24 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP27]], i32 [[TMP28]] -// CHECK3-NEXT: store double [[ADD6]], double* [[ARRAYIDX7]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: store double [[ADD6]], double* [[ARRAYIDX7]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP29]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP29]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP30]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: store i32* [[I3]], i32** [[TMP30]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP31]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP31]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP32]], align 4, !llvm.access.group !24 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(%class.anon.5* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !24 +// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP32]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(%class.anon.5* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !47 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -6400,24 +6400,24 @@ int main() { // CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**, i32)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i32 [[TMP22]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**, i32)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i32 [[TMP22]]), !llvm.access.group !50 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -6516,45 +6516,45 @@ int main() { // CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 // CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !28 -// CHECK3-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !28 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !53 +// CHECK3-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !53 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !53 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP22]], i32 [[TMP23]] -// CHECK3-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !28 -// CHECK3-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !28 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !53 +// CHECK3-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !53 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !53 // CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP25]], i32 [[TMP26]] -// CHECK3-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !53 // CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK3-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !28 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !53 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !53 // CHECK3-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP28]], i32 [[TMP29]] -// CHECK3-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !53 // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 0 -// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP30]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: store double** [[TMP1]], double*** [[TMP30]], align 4, !llvm.access.group !53 // CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[I4]], i32** [[TMP31]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: store i32* [[I4]], i32** [[TMP31]], align 4, !llvm.access.group !53 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 2 -// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP32]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: store double** [[TMP2]], double*** [[TMP32]], align 4, !llvm.access.group !53 // CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 3 -// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP33]], align 4, !llvm.access.group !28 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(%class.anon.6* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !28 +// CHECK3-NEXT: store double** [[TMP3]], double*** [[TMP33]], align 4, !llvm.access.group !53 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(%class.anon.6* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !53 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP34]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -6692,21 +6692,21 @@ int main() { // CHECK4-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !11 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6805,45 +6805,45 @@ int main() { // CHECK4-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK4-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4 -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK4-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4 +// CHECK4-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK4-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] -// CHECK4-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4 +// CHECK4-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 0 -// CHECK4-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 4 +// CHECK4-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 1 -// CHECK4-NEXT: store i32* [[I3]], i32** [[TMP29]], align 4 +// CHECK4-NEXT: store i32* [[I3]], i32** [[TMP29]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 2 -// CHECK4-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 4 +// CHECK4-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP]], i32 0, i32 3 -// CHECK4-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 4 -// CHECK4-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 4 dereferenceable(16) [[REF_TMP]]) +// CHECK4-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !15 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 -// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6948,21 +6948,21 @@ int main() { // CHECK4-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !20 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !20 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !20 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -7061,45 +7061,45 @@ int main() { // CHECK4-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !23 +// CHECK4-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !23 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK4-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4 -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !23 +// CHECK4-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !23 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK4-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4 +// CHECK4-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK4-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !23 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] -// CHECK4-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4 +// CHECK4-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 0 -// CHECK4-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 4 +// CHECK4-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 1 -// CHECK4-NEXT: store i32* [[I3]], i32** [[TMP29]], align 4 +// CHECK4-NEXT: store i32* [[I3]], i32** [[TMP29]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 2 -// CHECK4-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 4 +// CHECK4-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_1]], %class.anon.1* [[REF_TMP]], i32 0, i32 3 -// CHECK4-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 4 -// CHECK4-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE0_clEv"(%class.anon.1* nonnull align 4 dereferenceable(16) [[REF_TMP]]) +// CHECK4-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 4, !llvm.access.group !23 +// CHECK4-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE0_clEv"(%class.anon.1* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !23 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 -// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -7210,45 +7210,45 @@ int main() { // CHECK4-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK4-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]]) +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]]), !llvm.access.group !26 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK4-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] // CHECK4-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK4: cond.true10: -// CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !26 // CHECK4-NEXT: br label [[COND_END12:%.*]] // CHECK4: cond.false11: -// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK4-NEXT: br label [[COND_END12]] // CHECK4: cond.end12: // CHECK4-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE10]] ], [ [[TMP30]], [[COND_FALSE11]] ] -// CHECK4-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK4-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -7347,45 +7347,45 @@ int main() { // CHECK4-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !29 +// CHECK4-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !29 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK4-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4 -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !29 +// CHECK4-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !29 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK4-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4 +// CHECK4-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK4-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !29 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] -// CHECK4-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4 +// CHECK4-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 0 -// CHECK4-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 4 +// CHECK4-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 1 -// CHECK4-NEXT: store i32* [[I3]], i32** [[TMP29]], align 4 +// CHECK4-NEXT: store i32* [[I3]], i32** [[TMP29]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 2 -// CHECK4-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 4 +// CHECK4-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_2]], %class.anon.2* [[REF_TMP]], i32 0, i32 3 -// CHECK4-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 4 -// CHECK4-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE1_clEv"(%class.anon.2* nonnull align 4 dereferenceable(16) [[REF_TMP]]) +// CHECK4-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 4, !llvm.access.group !29 +// CHECK4-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE1_clEv"(%class.anon.2* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !29 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 -// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -7490,21 +7490,21 @@ int main() { // CHECK4-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !32 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -7603,45 +7603,45 @@ int main() { // CHECK4-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK4-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4 -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK4-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4 +// CHECK4-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK4-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] -// CHECK4-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4 +// CHECK4-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 0 -// CHECK4-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 4 +// CHECK4-NEXT: store double** [[TMP1]], double*** [[TMP28]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 1 -// CHECK4-NEXT: store i32* [[I3]], i32** [[TMP29]], align 4 +// CHECK4-NEXT: store i32* [[I3]], i32** [[TMP29]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 2 -// CHECK4-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 4 +// CHECK4-NEXT: store double** [[TMP2]], double*** [[TMP30]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_3]], %class.anon.3* [[REF_TMP]], i32 0, i32 3 -// CHECK4-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 4 -// CHECK4-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE2_clEv"(%class.anon.3* nonnull align 4 dereferenceable(16) [[REF_TMP]]) +// CHECK4-NEXT: store double** [[TMP3]], double*** [[TMP31]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE2_clEv"(%class.anon.3* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !35 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP32]], 1 -// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -7755,24 +7755,24 @@ int main() { // CHECK4-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK4-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK4-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i32 [[TMP22]]) +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !38 +// CHECK4-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !38 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !38 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i32 [[TMP22]]), !llvm.access.group !38 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -7881,45 +7881,45 @@ int main() { // CHECK4: omp.dispatch.body: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 // CHECK4-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK4-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !41 +// CHECK4-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !41 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !41 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK4-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK4-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !41 +// CHECK4-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !41 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !41 // CHECK4-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] -// CHECK4-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX8]], align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX8]], align 4, !llvm.access.group !41 // CHECK4-NEXT: [[ADD9:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK4-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 4 -// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK4-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !41 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !41 // CHECK4-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP29]], i32 [[TMP30]] -// CHECK4-NEXT: store double [[ADD9]], double* [[ARRAYIDX10]], align 4 +// CHECK4-NEXT: store double [[ADD9]], double* [[ARRAYIDX10]], align 4, !llvm.access.group !41 // CHECK4-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 0 -// CHECK4-NEXT: store double** [[TMP1]], double*** [[TMP31]], align 4 +// CHECK4-NEXT: store double** [[TMP1]], double*** [[TMP31]], align 4, !llvm.access.group !41 // CHECK4-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 1 -// CHECK4-NEXT: store i32* [[I4]], i32** [[TMP32]], align 4 +// CHECK4-NEXT: store i32* [[I4]], i32** [[TMP32]], align 4, !llvm.access.group !41 // CHECK4-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 2 -// CHECK4-NEXT: store double** [[TMP2]], double*** [[TMP33]], align 4 +// CHECK4-NEXT: store double** [[TMP2]], double*** [[TMP33]], align 4, !llvm.access.group !41 // CHECK4-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[CLASS_ANON_4]], %class.anon.4* [[REF_TMP]], i32 0, i32 3 -// CHECK4-NEXT: store double** [[TMP3]], double*** [[TMP34]], align 4 -// CHECK4-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE3_clEv"(%class.anon.4* nonnull align 4 dereferenceable(16) [[REF_TMP]]) +// CHECK4-NEXT: store double** [[TMP3]], double*** [[TMP34]], align 4, !llvm.access.group !41 +// CHECK4-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE3_clEv"(%class.anon.4* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !41 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK4-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK4-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -8034,21 +8034,21 @@ int main() { // CHECK4-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 // CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !44 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !44 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !44 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -8144,45 +8144,45 @@ int main() { // CHECK4-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK4-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !24 -// CHECK4-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !24 -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !24 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !47 +// CHECK4-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !47 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP21]], i32 [[TMP22]] -// CHECK4-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !24 -// CHECK4-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !24 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !24 +// CHECK4-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !47 +// CHECK4-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !47 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 [[TMP25]] -// CHECK4-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX5]], align 4, !llvm.access.group !24 +// CHECK4-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX5]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[ADD6:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK4-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !24 -// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !24 +// CHECK4-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !47 +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP27]], i32 [[TMP28]] -// CHECK4-NEXT: store double [[ADD6]], double* [[ARRAYIDX7]], align 4, !llvm.access.group !24 +// CHECK4-NEXT: store double [[ADD6]], double* [[ARRAYIDX7]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 0 -// CHECK4-NEXT: store double** [[TMP1]], double*** [[TMP29]], align 4, !llvm.access.group !24 +// CHECK4-NEXT: store double** [[TMP1]], double*** [[TMP29]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 1 -// CHECK4-NEXT: store i32* [[I3]], i32** [[TMP30]], align 4, !llvm.access.group !24 +// CHECK4-NEXT: store i32* [[I3]], i32** [[TMP30]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 2 -// CHECK4-NEXT: store double** [[TMP2]], double*** [[TMP31]], align 4, !llvm.access.group !24 +// CHECK4-NEXT: store double** [[TMP2]], double*** [[TMP31]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_5]], %class.anon.5* [[REF_TMP]], i32 0, i32 3 -// CHECK4-NEXT: store double** [[TMP3]], double*** [[TMP32]], align 4, !llvm.access.group !24 -// CHECK4-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(%class.anon.5* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !24 +// CHECK4-NEXT: store double** [[TMP3]], double*** [[TMP32]], align 4, !llvm.access.group !47 +// CHECK4-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE4_clEv"(%class.anon.5* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !47 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK4-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], 1 -// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -8295,24 +8295,24 @@ int main() { // CHECK4-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK4-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK4-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**, i32)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i32 [[TMP22]]) +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !50 +// CHECK4-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !50 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !50 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**, i32)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i32 [[TMP22]]), !llvm.access.group !50 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -8411,45 +8411,45 @@ int main() { // CHECK4-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 // CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !28 -// CHECK4-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !28 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !53 +// CHECK4-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !53 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !53 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP22]], i32 [[TMP23]] -// CHECK4-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !28 -// CHECK4-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !28 -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 +// CHECK4-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !53 +// CHECK4-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !53 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !53 // CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP25]], i32 [[TMP26]] -// CHECK4-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !28 +// CHECK4-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !53 // CHECK4-NEXT: [[ADD7:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK4-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !28 -// CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !28 +// CHECK4-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !53 +// CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !53 // CHECK4-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP28]], i32 [[TMP29]] -// CHECK4-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !28 +// CHECK4-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !53 // CHECK4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 0 -// CHECK4-NEXT: store double** [[TMP1]], double*** [[TMP30]], align 4, !llvm.access.group !28 +// CHECK4-NEXT: store double** [[TMP1]], double*** [[TMP30]], align 4, !llvm.access.group !53 // CHECK4-NEXT: [[TMP31:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 1 -// CHECK4-NEXT: store i32* [[I4]], i32** [[TMP31]], align 4, !llvm.access.group !28 +// CHECK4-NEXT: store i32* [[I4]], i32** [[TMP31]], align 4, !llvm.access.group !53 // CHECK4-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 2 -// CHECK4-NEXT: store double** [[TMP2]], double*** [[TMP32]], align 4, !llvm.access.group !28 +// CHECK4-NEXT: store double** [[TMP2]], double*** [[TMP32]], align 4, !llvm.access.group !53 // CHECK4-NEXT: [[TMP33:%.*]] = getelementptr inbounds [[CLASS_ANON_6]], %class.anon.6* [[REF_TMP]], i32 0, i32 3 -// CHECK4-NEXT: store double** [[TMP3]], double*** [[TMP33]], align 4, !llvm.access.group !28 -// CHECK4-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(%class.anon.6* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !28 +// CHECK4-NEXT: store double** [[TMP3]], double*** [[TMP33]], align 4, !llvm.access.group !53 +// CHECK4-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE5_clEv"(%class.anon.6* nonnull align 4 dereferenceable(16) [[REF_TMP]]), !llvm.access.group !53 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK4-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP34]], 1 -// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -9186,23 +9186,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !17 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -9302,39 +9302,39 @@ int main() { // CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !21 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !21 // CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 // CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !21 // CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !21 // CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 // CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] -// CHECK9-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8 +// CHECK9-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !21 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -9440,23 +9440,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 // CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !26 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -9556,39 +9556,39 @@ int main() { // CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !29 +// CHECK9-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !29 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !29 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !29 +// CHECK9-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !29 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !29 // CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 // CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !29 // CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !29 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !29 // CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 // CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] -// CHECK9-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8 +// CHECK9-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !29 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -9701,47 +9701,47 @@ int main() { // CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !32 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 // CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]]), !llvm.access.group !32 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 // CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !32 // CHECK9-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK9: cond.true10: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !32 // CHECK9-NEXT: br label [[COND_END12:%.*]] // CHECK9: cond.false11: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK9-NEXT: br label [[COND_END12]] // CHECK9: cond.end12: // CHECK9-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE10]] ], [ [[TMP32]], [[COND_FALSE11]] ] -// CHECK9-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK9-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 +// CHECK9-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -9841,39 +9841,39 @@ int main() { // CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 // CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !35 +// CHECK9-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !35 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !35 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !35 +// CHECK9-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !35 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !35 // CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 // CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !35 // CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !35 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !35 // CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 // CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] -// CHECK9-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8 +// CHECK9-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !35 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -9979,23 +9979,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 // CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !38 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -10095,39 +10095,39 @@ int main() { // CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 // CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !41 +// CHECK9-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !41 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !41 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK9-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !41 +// CHECK9-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !41 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !41 // CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 // CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK9-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !41 // CHECK9-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !41 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !41 // CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 // CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] -// CHECK9-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8 +// CHECK9-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !41 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -10243,27 +10243,27 @@ int main() { // CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 // CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !44 // CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 // CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !44 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i64 [[TMP24]]) +// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4, !llvm.access.group !44 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !44 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i64 [[TMP24]]), !llvm.access.group !44 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !44 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -10377,39 +10377,39 @@ int main() { // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 // CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !47 +// CHECK9-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !47 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !47 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK9-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !47 +// CHECK9-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !47 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !47 // CHECK9-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 // CHECK9-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM13]] -// CHECK9-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8 +// CHECK9-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !47 // CHECK9-NEXT: [[ADD15:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !47 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !47 // CHECK9-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 // CHECK9-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM16]] -// CHECK9-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8 +// CHECK9-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !47 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: @@ -10525,23 +10525,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 // CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !50 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -10638,39 +10638,39 @@ int main() { // CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !30 -// CHECK9-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !30 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !53 +// CHECK9-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !53 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !53 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP21]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !30 -// CHECK9-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !30 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !53 +// CHECK9-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !53 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !53 // CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 // CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP24]], i64 [[IDXPROM6]] -// CHECK9-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX7]], align 8, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX7]], align 8, !llvm.access.group !53 // CHECK9-NEXT: [[ADD8:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK9-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !30 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !53 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !53 // CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 // CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP27]], i64 [[IDXPROM9]] -// CHECK9-NEXT: store double [[ADD8]], double* [[ARRAYIDX10]], align 8, !llvm.access.group !30 +// CHECK9-NEXT: store double [[ADD8]], double* [[ARRAYIDX10]], align 8, !llvm.access.group !53 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP29]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: @@ -10785,27 +10785,27 @@ int main() { // CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 // CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !56 // CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 // CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !56 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**, i64)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i64 [[TMP24]]) +// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4, !llvm.access.group !56 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !56 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**, i64)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i64 [[TMP24]]), !llvm.access.group !56 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !56 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -10906,39 +10906,39 @@ int main() { // CHECK9-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !59 // CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !34 -// CHECK9-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !34 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !59 +// CHECK9-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !59 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !59 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP22]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !34 -// CHECK9-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !34 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !59 +// CHECK9-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !59 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !59 // CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP26]] to i64 // CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds double, double* [[TMP25]], i64 [[IDXPROM8]] -// CHECK9-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX9]], align 8, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX9]], align 8, !llvm.access.group !59 // CHECK9-NEXT: [[ADD10:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK9-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !34 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !59 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !59 // CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP29]] to i64 // CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP28]], i64 [[IDXPROM11]] -// CHECK9-NEXT: store double [[ADD10]], double* [[ARRAYIDX12]], align 8, !llvm.access.group !34 +// CHECK9-NEXT: store double [[ADD10]], double* [[ARRAYIDX12]], align 8, !llvm.access.group !59 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP30]], 1 -// CHECK9-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: @@ -11557,23 +11557,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !62 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !62 // CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !62 // CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]), !llvm.access.group !62 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !62 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -11673,39 +11673,39 @@ int main() { // CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !65 // CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !65 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !65 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !65 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !65 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !65 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !65 // CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 // CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4, !llvm.access.group !65 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !65 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !65 // CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 // CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM10]] -// CHECK9-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX11]], align 4 +// CHECK9-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX11]], align 4, !llvm.access.group !65 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 // CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP66:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -11811,23 +11811,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !68 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !68 // CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !68 // CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]), !llvm.access.group !68 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !68 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP69:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -11927,39 +11927,39 @@ int main() { // CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !71 // CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !71 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !71 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !71 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !71 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !71 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !71 // CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 // CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4, !llvm.access.group !71 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !71 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !71 // CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 // CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM10]] -// CHECK9-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX11]], align 4 +// CHECK9-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX11]], align 4, !llvm.access.group !71 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 // CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP72:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -12072,47 +12072,47 @@ int main() { // CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !74 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !74 // CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !74 // CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]]), !llvm.access.group !74 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !74 // CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !74 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !74 // CHECK9-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !74 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !74 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !74 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !74 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !74 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !74 // CHECK9-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] // CHECK9-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK9: cond.true10: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !74 // CHECK9-NEXT: br label [[COND_END12:%.*]] // CHECK9: cond.false11: -// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !74 // CHECK9-NEXT: br label [[COND_END12]] // CHECK9: cond.end12: // CHECK9-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE10]] ], [ [[TMP32]], [[COND_FALSE11]] ] -// CHECK9-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK9-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] +// CHECK9-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !74 +// CHECK9-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !74 +// CHECK9-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP75:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -12212,39 +12212,39 @@ int main() { // CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !77 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !77 // CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !77 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !77 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !77 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !77 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !77 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !77 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !77 // CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 // CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4, !llvm.access.group !77 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !77 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !77 // CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 // CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM10]] -// CHECK9-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX11]], align 4 +// CHECK9-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX11]], align 4, !llvm.access.group !77 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !77 // CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !77 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP78:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -12350,23 +12350,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !80 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !80 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !80 // CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !80 // CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]), !llvm.access.group !80 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !80 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !80 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !80 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP81:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -12466,39 +12466,39 @@ int main() { // CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !83 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !83 // CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !83 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !83 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !83 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !83 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !83 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !83 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !83 // CHECK9-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 // CHECK9-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i64 [[IDXPROM7]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4, !llvm.access.group !83 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !83 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !83 // CHECK9-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 // CHECK9-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM10]] -// CHECK9-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX11]], align 4 +// CHECK9-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX11]], align 4, !llvm.access.group !83 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !83 // CHECK9-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !83 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP84:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -12614,27 +12614,27 @@ int main() { // CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !86 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !86 // CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !86 // CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !86 // CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !86 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**, i64)* @.omp_outlined..43 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]], i64 [[TMP24]]) +// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4, !llvm.access.group !86 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !86 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**, i64)* @.omp_outlined..43 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]], i64 [[TMP24]]), !llvm.access.group !86 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !86 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !86 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !86 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP87:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -12748,39 +12748,39 @@ int main() { // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !89 // CHECK9-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK9-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP2]], align 8 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !89 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !89 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !89 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP3]], align 8 -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !89 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !89 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !89 // CHECK9-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 // CHECK9-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM13]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX14]], align 4 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX14]], align 4, !llvm.access.group !89 // CHECK9-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] -// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP1]], align 8 -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !89 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !89 // CHECK9-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 // CHECK9-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM16]] -// CHECK9-NEXT: store i32 [[ADD15]], i32* [[ARRAYIDX17]], align 4 +// CHECK9-NEXT: store i32 [[ADD15]], i32* [[ARRAYIDX17]], align 4, !llvm.access.group !89 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 // CHECK9-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP90:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: @@ -12896,23 +12896,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !92 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !92 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !92 // CHECK9-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !92 // CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**)* @.omp_outlined..47 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**)* @.omp_outlined..47 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]), !llvm.access.group !92 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !92 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !92 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !92 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP93:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -13009,39 +13009,39 @@ int main() { // CHECK9-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !95 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !95 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !95 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !48 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !48 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !95 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !95 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !95 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !48 -// CHECK9-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !48 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !95 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !95 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !95 // CHECK9-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 // CHECK9-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[TMP24]], i64 [[IDXPROM6]] -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4, !llvm.access.group !95 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP26]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !48 -// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !95 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !95 // CHECK9-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 // CHECK9-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[TMP27]], i64 [[IDXPROM9]] -// CHECK9-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX10]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX10]], align 4, !llvm.access.group !95 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !95 // CHECK9-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP29]], 1 -// CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !95 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP96:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: @@ -13156,27 +13156,27 @@ int main() { // CHECK9-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !98 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !98 // CHECK9-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !98 // CHECK9-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !98 // CHECK9-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !98 // CHECK9-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4 -// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**, i64)* @.omp_outlined..51 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]], i64 [[TMP24]]) +// CHECK9-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4, !llvm.access.group !98 +// CHECK9-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !98 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**, i64)* @.omp_outlined..51 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]], i64 [[TMP24]]), !llvm.access.group !98 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !98 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !98 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !98 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP99:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -13277,39 +13277,39 @@ int main() { // CHECK9-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !52 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !101 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !101 // CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !101 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !52 -// CHECK9-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !52 -// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !52 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !101 +// CHECK9-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !101 +// CHECK9-NEXT: [[TMP23:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !101 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP22]], i64 [[IDXPROM]] -// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !52 -// CHECK9-NEXT: [[TMP25:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !52 -// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !52 +// CHECK9-NEXT: [[TMP24:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !101 +// CHECK9-NEXT: [[TMP25:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !101 +// CHECK9-NEXT: [[TMP26:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !101 // CHECK9-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP26]] to i64 // CHECK9-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[TMP25]], i64 [[IDXPROM8]] -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4, !llvm.access.group !52 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4, !llvm.access.group !101 // CHECK9-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP27]] -// CHECK9-NEXT: [[TMP28:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !52 -// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !52 +// CHECK9-NEXT: [[TMP28:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !101 +// CHECK9-NEXT: [[TMP29:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !101 // CHECK9-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP29]] to i64 // CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[TMP28]], i64 [[IDXPROM11]] -// CHECK9-NEXT: store i32 [[ADD10]], i32* [[ARRAYIDX12]], align 4, !llvm.access.group !52 +// CHECK9-NEXT: store i32 [[ADD10]], i32* [[ARRAYIDX12]], align 4, !llvm.access.group !101 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK9-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !101 // CHECK9-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP30]], 1 -// CHECK9-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !101 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP102:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: @@ -13938,23 +13938,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !17 // CHECK10-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 // CHECK10-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !17 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !17 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -14054,39 +14054,39 @@ int main() { // CHECK10-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK10-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK10-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK10-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !21 +// CHECK10-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !21 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !21 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK10-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK10-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !21 +// CHECK10-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !21 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !21 // CHECK10-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 // CHECK10-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK10-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK10-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !21 // CHECK10-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK10-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8 -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK10-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !21 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !21 // CHECK10-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 // CHECK10-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] -// CHECK10-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8 +// CHECK10-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !21 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK10-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK10-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -14192,23 +14192,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !26 // CHECK10-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !26 // CHECK10-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !26 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !26 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -14308,39 +14308,39 @@ int main() { // CHECK10-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK10-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK10-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK10-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !29 +// CHECK10-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !29 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !29 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK10-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK10-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !29 +// CHECK10-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !29 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !29 // CHECK10-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 // CHECK10-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK10-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK10-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !29 // CHECK10-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK10-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8 -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK10-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !29 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !29 // CHECK10-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 // CHECK10-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] -// CHECK10-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8 +// CHECK10-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !29 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK10-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK10-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -14453,47 +14453,47 @@ int main() { // CHECK10-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !32 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK10-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 // CHECK10-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK10-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]]), !llvm.access.group !32 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 // CHECK10-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK10-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 // CHECK10-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK10-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !32 // CHECK10-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// CHECK10-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !32 // CHECK10-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] // CHECK10-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK10: cond.true10: -// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !32 // CHECK10-NEXT: br label [[COND_END12:%.*]] // CHECK10: cond.false11: -// CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 // CHECK10-NEXT: br label [[COND_END12]] // CHECK10: cond.end12: // CHECK10-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE10]] ], [ [[TMP32]], [[COND_FALSE11]] ] -// CHECK10-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK10-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !32 +// CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !32 +// CHECK10-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -14593,39 +14593,39 @@ int main() { // CHECK10-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 // CHECK10-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK10-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK10-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !35 +// CHECK10-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !35 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !35 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK10-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK10-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !35 +// CHECK10-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !35 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !35 // CHECK10-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 // CHECK10-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK10-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK10-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !35 // CHECK10-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK10-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8 -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK10-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !35 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !35 // CHECK10-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 // CHECK10-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] -// CHECK10-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8 +// CHECK10-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !35 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK10-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK10-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -14731,23 +14731,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 // CHECK10-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK10-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !38 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -14847,39 +14847,39 @@ int main() { // CHECK10-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 // CHECK10-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK10-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK10-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !41 +// CHECK10-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !41 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !41 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i64 [[IDXPROM]] -// CHECK10-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK10-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !41 +// CHECK10-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !41 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !41 // CHECK10-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 // CHECK10-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM7]] -// CHECK10-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8 +// CHECK10-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX8]], align 8, !llvm.access.group !41 // CHECK10-NEXT: [[ADD9:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK10-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8 -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK10-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !41 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !41 // CHECK10-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 // CHECK10-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM10]] -// CHECK10-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8 +// CHECK10-NEXT: store double [[ADD9]], double* [[ARRAYIDX11]], align 8, !llvm.access.group !41 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK10-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK10-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -14995,27 +14995,27 @@ int main() { // CHECK10-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 // CHECK10-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK10-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !44 // CHECK10-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !44 // CHECK10-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !44 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4 -// CHECK10-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i64 [[TMP24]]) +// CHECK10-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4, !llvm.access.group !44 +// CHECK10-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !44 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**, i64)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i64 [[TMP24]]), !llvm.access.group !44 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !44 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -15129,39 +15129,39 @@ int main() { // CHECK10: omp.dispatch.body: // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 // CHECK10-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK10-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP2]], align 8 -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !47 +// CHECK10-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !47 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !47 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP23]], i64 [[IDXPROM]] -// CHECK10-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8 -// CHECK10-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8 -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK10-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !47 +// CHECK10-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !47 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !47 // CHECK10-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 // CHECK10-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds double, double* [[TMP26]], i64 [[IDXPROM13]] -// CHECK10-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8 +// CHECK10-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !47 // CHECK10-NEXT: [[ADD15:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK10-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8 -// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK10-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !47 +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !47 // CHECK10-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 // CHECK10-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP29]], i64 [[IDXPROM16]] -// CHECK10-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8 +// CHECK10-NEXT: store double [[ADD15]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !47 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK10-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK10-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: @@ -15277,23 +15277,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 // CHECK10-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK10-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !50 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -15390,39 +15390,39 @@ int main() { // CHECK10-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !30 -// CHECK10-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !30 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !53 +// CHECK10-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !53 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !53 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP21]], i64 [[IDXPROM]] -// CHECK10-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !30 -// CHECK10-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !30 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !53 +// CHECK10-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !53 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !53 // CHECK10-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 // CHECK10-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP24]], i64 [[IDXPROM6]] -// CHECK10-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX7]], align 8, !llvm.access.group !30 +// CHECK10-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX7]], align 8, !llvm.access.group !53 // CHECK10-NEXT: [[ADD8:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK10-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !30 -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !53 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !53 // CHECK10-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 // CHECK10-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP27]], i64 [[IDXPROM9]] -// CHECK10-NEXT: store double [[ADD8]], double* [[ARRAYIDX10]], align 8, !llvm.access.group !30 +// CHECK10-NEXT: store double [[ADD8]], double* [[ARRAYIDX10]], align 8, !llvm.access.group !53 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK10-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP29]], 1 -// CHECK10-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: @@ -15537,27 +15537,27 @@ int main() { // CHECK10-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 // CHECK10-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK10-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !56 // CHECK10-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !56 // CHECK10-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !56 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4 -// CHECK10-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**, i64)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i64 [[TMP24]]) +// CHECK10-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4, !llvm.access.group !56 +// CHECK10-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !56 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, double**, double**, double**, i64)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i64 [[TMP24]]), !llvm.access.group !56 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !56 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -15658,39 +15658,39 @@ int main() { // CHECK10-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !34 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !59 // CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !34 -// CHECK10-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !34 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !34 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !59 +// CHECK10-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 8, !llvm.access.group !59 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !59 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP22]], i64 [[IDXPROM]] -// CHECK10-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !34 -// CHECK10-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !34 -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !34 +// CHECK10-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 8, !llvm.access.group !59 +// CHECK10-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 8, !llvm.access.group !59 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !59 // CHECK10-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP26]] to i64 // CHECK10-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds double, double* [[TMP25]], i64 [[IDXPROM8]] -// CHECK10-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX9]], align 8, !llvm.access.group !34 +// CHECK10-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX9]], align 8, !llvm.access.group !59 // CHECK10-NEXT: [[ADD10:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK10-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !34 -// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !34 +// CHECK10-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 8, !llvm.access.group !59 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !59 // CHECK10-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP29]] to i64 // CHECK10-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds double, double* [[TMP28]], i64 [[IDXPROM11]] -// CHECK10-NEXT: store double [[ADD10]], double* [[ARRAYIDX12]], align 8, !llvm.access.group !34 +// CHECK10-NEXT: store double [[ADD10]], double* [[ARRAYIDX12]], align 8, !llvm.access.group !59 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 // CHECK10-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP30]], 1 -// CHECK10-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: @@ -16309,23 +16309,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !62 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !62 // CHECK10-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !62 // CHECK10-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]), !llvm.access.group !62 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !62 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -16425,39 +16425,39 @@ int main() { // CHECK10-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !65 // CHECK10-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK10-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 8 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !65 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !65 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !65 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i64 [[IDXPROM]] -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 8 -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !65 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !65 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !65 // CHECK10-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 // CHECK10-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i64 [[IDXPROM7]] -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4, !llvm.access.group !65 // CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK10-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 8 -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !65 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !65 // CHECK10-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 // CHECK10-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM10]] -// CHECK10-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX11]], align 4 +// CHECK10-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX11]], align 4, !llvm.access.group !65 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 // CHECK10-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK10-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP66:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -16563,23 +16563,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !68 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !68 // CHECK10-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !68 // CHECK10-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]), !llvm.access.group !68 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !68 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP69:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -16679,39 +16679,39 @@ int main() { // CHECK10-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !71 // CHECK10-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK10-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 8 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !71 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !71 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !71 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i64 [[IDXPROM]] -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 8 -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !71 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !71 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !71 // CHECK10-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 // CHECK10-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i64 [[IDXPROM7]] -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4, !llvm.access.group !71 // CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK10-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 8 -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !71 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !71 // CHECK10-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 // CHECK10-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM10]] -// CHECK10-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX11]], align 4 +// CHECK10-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX11]], align 4, !llvm.access.group !71 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 // CHECK10-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK10-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP72:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -16824,47 +16824,47 @@ int main() { // CHECK10-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !74 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK10-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !74 // CHECK10-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !74 // CHECK10-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]]), !llvm.access.group !74 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !74 // CHECK10-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK10-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !74 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !74 // CHECK10-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK10-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !74 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !74 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !74 // CHECK10-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// CHECK10-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !74 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !74 +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !74 // CHECK10-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] // CHECK10-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK10: cond.true10: -// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !74 // CHECK10-NEXT: br label [[COND_END12:%.*]] // CHECK10: cond.false11: -// CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !74 // CHECK10-NEXT: br label [[COND_END12]] // CHECK10: cond.end12: // CHECK10-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE10]] ], [ [[TMP32]], [[COND_FALSE11]] ] -// CHECK10-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK10-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] +// CHECK10-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !74 +// CHECK10-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !74 +// CHECK10-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP75:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -16964,39 +16964,39 @@ int main() { // CHECK10-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !77 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !77 // CHECK10-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK10-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !77 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 8 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !77 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !77 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !77 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i64 [[IDXPROM]] -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 8 -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !77 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !77 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !77 // CHECK10-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 // CHECK10-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i64 [[IDXPROM7]] -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4, !llvm.access.group !77 // CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK10-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 8 -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !77 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !77 // CHECK10-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 // CHECK10-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM10]] -// CHECK10-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX11]], align 4 +// CHECK10-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX11]], align 4, !llvm.access.group !77 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !77 // CHECK10-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK10-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !77 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP78:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -17102,23 +17102,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !80 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !80 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !80 // CHECK10-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !80 // CHECK10-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]), !llvm.access.group !80 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !80 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !80 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !80 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP81:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -17218,39 +17218,39 @@ int main() { // CHECK10-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !83 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !83 // CHECK10-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK10-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !83 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 8 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !83 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !83 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !83 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i64 [[IDXPROM]] -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 8 -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !83 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !83 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !83 // CHECK10-NEXT: [[IDXPROM7:%.*]] = sext i32 [[TMP24]] to i64 // CHECK10-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i64 [[IDXPROM7]] -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4, !llvm.access.group !83 // CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK10-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 8 -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !83 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !83 // CHECK10-NEXT: [[IDXPROM10:%.*]] = sext i32 [[TMP27]] to i64 // CHECK10-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM10]] -// CHECK10-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX11]], align 4 +// CHECK10-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX11]], align 4, !llvm.access.group !83 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !83 // CHECK10-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK10-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !83 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP84:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -17366,27 +17366,27 @@ int main() { // CHECK10-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !86 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !86 // CHECK10-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK10-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !86 // CHECK10-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !86 // CHECK10-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !86 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4 -// CHECK10-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**, i64)* @.omp_outlined..43 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]], i64 [[TMP24]]) +// CHECK10-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4, !llvm.access.group !86 +// CHECK10-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !86 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**, i64)* @.omp_outlined..43 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]], i64 [[TMP24]]), !llvm.access.group !86 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !86 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !86 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !86 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP87:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -17500,39 +17500,39 @@ int main() { // CHECK10: omp.dispatch.body: // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !89 // CHECK10-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK10-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP2]], align 8 -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !89 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !89 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !89 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP24]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i64 [[IDXPROM]] -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK10-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP3]], align 8 -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !89 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !89 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !89 // CHECK10-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP27]] to i64 // CHECK10-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i64 [[IDXPROM13]] -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX14]], align 4 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX14]], align 4, !llvm.access.group !89 // CHECK10-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] -// CHECK10-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP1]], align 8 -// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !89 +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !89 // CHECK10-NEXT: [[IDXPROM16:%.*]] = sext i32 [[TMP30]] to i64 // CHECK10-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i64 [[IDXPROM16]] -// CHECK10-NEXT: store i32 [[ADD15]], i32* [[ARRAYIDX17]], align 4 +// CHECK10-NEXT: store i32 [[ADD15]], i32* [[ARRAYIDX17]], align 4, !llvm.access.group !89 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 // CHECK10-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK10-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !89 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP90:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: @@ -17648,23 +17648,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !92 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !92 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !92 // CHECK10-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !92 // CHECK10-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**)* @.omp_outlined..47 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**)* @.omp_outlined..47 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]), !llvm.access.group !92 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !92 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !92 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !92 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP93:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -17761,39 +17761,39 @@ int main() { // CHECK10-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !48 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !95 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !95 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !95 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !48 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !48 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !48 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !95 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !95 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !95 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP22]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i64 [[IDXPROM]] -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !48 -// CHECK10-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !48 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !48 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !95 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !95 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !95 // CHECK10-NEXT: [[IDXPROM6:%.*]] = sext i32 [[TMP25]] to i64 // CHECK10-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[TMP24]], i64 [[IDXPROM6]] -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4, !llvm.access.group !48 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[ARRAYIDX7]], align 4, !llvm.access.group !95 // CHECK10-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP26]] -// CHECK10-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !48 -// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !48 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !95 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !95 // CHECK10-NEXT: [[IDXPROM9:%.*]] = sext i32 [[TMP28]] to i64 // CHECK10-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[TMP27]], i64 [[IDXPROM9]] -// CHECK10-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX10]], align 4, !llvm.access.group !48 +// CHECK10-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX10]], align 4, !llvm.access.group !95 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !95 // CHECK10-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP29]], 1 -// CHECK10-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !95 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP96:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: @@ -17908,27 +17908,27 @@ int main() { // CHECK10-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !98 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !98 // CHECK10-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK10-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !98 // CHECK10-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !98 // CHECK10-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !98 // CHECK10-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4 -// CHECK10-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**, i64)* @.omp_outlined..51 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]], i64 [[TMP24]]) +// CHECK10-NEXT: store i32 [[TMP23]], i32* [[CONV]], align 4, !llvm.access.group !98 +// CHECK10-NEXT: [[TMP24:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !98 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i32**, i32**, i32**, i64)* @.omp_outlined..51 to void (i32*, i32*, ...)*), i64 [[TMP20]], i64 [[TMP22]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]], i64 [[TMP24]]), !llvm.access.group !98 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !98 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !98 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !98 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP99:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -18029,39 +18029,39 @@ int main() { // CHECK10-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !52 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !101 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !101 // CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !101 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !52 -// CHECK10-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !52 -// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !52 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !101 +// CHECK10-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP2]], align 8, !llvm.access.group !101 +// CHECK10-NEXT: [[TMP23:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !101 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP22]], i64 [[IDXPROM]] -// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !52 -// CHECK10-NEXT: [[TMP25:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !52 -// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !52 +// CHECK10-NEXT: [[TMP24:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !101 +// CHECK10-NEXT: [[TMP25:%.*]] = load i32*, i32** [[TMP3]], align 8, !llvm.access.group !101 +// CHECK10-NEXT: [[TMP26:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !101 // CHECK10-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP26]] to i64 // CHECK10-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i32, i32* [[TMP25]], i64 [[IDXPROM8]] -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4, !llvm.access.group !52 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX9]], align 4, !llvm.access.group !101 // CHECK10-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP27]] -// CHECK10-NEXT: [[TMP28:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !52 -// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !52 +// CHECK10-NEXT: [[TMP28:%.*]] = load i32*, i32** [[TMP1]], align 8, !llvm.access.group !101 +// CHECK10-NEXT: [[TMP29:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !101 // CHECK10-NEXT: [[IDXPROM11:%.*]] = sext i32 [[TMP29]] to i64 // CHECK10-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds i32, i32* [[TMP28]], i64 [[IDXPROM11]] -// CHECK10-NEXT: store i32 [[ADD10]], i32* [[ARRAYIDX12]], align 4, !llvm.access.group !52 +// CHECK10-NEXT: store i32 [[ADD10]], i32* [[ARRAYIDX12]], align 4, !llvm.access.group !101 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK10-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !101 // CHECK10-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP30]], 1 -// CHECK10-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !101 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP102:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: @@ -18679,21 +18679,21 @@ int main() { // CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !18 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -18791,36 +18791,36 @@ int main() { // CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] -// CHECK11-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4 +// CHECK11-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !22 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -18925,21 +18925,21 @@ int main() { // CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !27 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !27 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !27 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -19037,36 +19037,36 @@ int main() { // CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] -// CHECK11-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4 +// CHECK11-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !30 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -19177,45 +19177,45 @@ int main() { // CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !33 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]]), !llvm.access.group !33 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !33 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !33 // CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !33 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !33 // CHECK11-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK11: cond.true10: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !33 // CHECK11-NEXT: br label [[COND_END12:%.*]] // CHECK11: cond.false11: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !33 // CHECK11-NEXT: br label [[COND_END12]] // CHECK11: cond.end12: // CHECK11-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE10]] ], [ [[TMP30]], [[COND_FALSE11]] ] -// CHECK11-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK11-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -19313,36 +19313,36 @@ int main() { // CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !36 +// CHECK11-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !36 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !36 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !36 +// CHECK11-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !36 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !36 // CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !36 // CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !36 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !36 // CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] -// CHECK11-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4 +// CHECK11-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !36 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -19447,21 +19447,21 @@ int main() { // CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !39 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !39 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -19559,36 +19559,36 @@ int main() { // CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !42 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !42 // CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !42 // CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !42 // CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] -// CHECK11-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4 +// CHECK11-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !42 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -19702,24 +19702,24 @@ int main() { // CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !45 // CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !45 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !45 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !45 +// CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !45 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !45 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i32 [[TMP22]]), !llvm.access.group !45 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !45 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -19827,36 +19827,36 @@ int main() { // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !48 // CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !48 +// CHECK11-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !48 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !48 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !48 +// CHECK11-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !48 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !48 // CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX8]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX8]], align 4, !llvm.access.group !48 // CHECK11-NEXT: [[ADD9:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !48 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !48 // CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP29]], i32 [[TMP30]] -// CHECK11-NEXT: store double [[ADD9]], double* [[ARRAYIDX10]], align 4 +// CHECK11-NEXT: store double [[ADD9]], double* [[ARRAYIDX10]], align 4, !llvm.access.group !48 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 // CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK11-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: @@ -19971,21 +19971,21 @@ int main() { // CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !51 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !51 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -20080,36 +20080,36 @@ int main() { // CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !54 // CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !31 -// CHECK11-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !31 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !54 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP21]], i32 [[TMP22]] -// CHECK11-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !31 -// CHECK11-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !31 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !54 // CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX5]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX5]], align 4, !llvm.access.group !54 // CHECK11-NEXT: [[ADD6:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !31 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !54 // CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP27]], i32 [[TMP28]] -// CHECK11-NEXT: store double [[ADD6]], double* [[ARRAYIDX7]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: store double [[ADD6]], double* [[ARRAYIDX7]], align 4, !llvm.access.group !54 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 -// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: @@ -20222,24 +20222,24 @@ int main() { // CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !57 // CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**, i32)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !57 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !57 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !57 +// CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !57 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !57 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**, i32)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i32 [[TMP22]]), !llvm.access.group !57 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !57 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -20337,36 +20337,36 @@ int main() { // CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !60 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !35 -// CHECK11-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !35 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !35 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !60 +// CHECK11-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !60 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !60 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP22]], i32 [[TMP23]] -// CHECK11-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !35 -// CHECK11-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !35 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !35 +// CHECK11-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !60 +// CHECK11-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !60 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !60 // CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP25]], i32 [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !35 +// CHECK11-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !60 // CHECK11-NEXT: [[ADD7:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !35 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !35 +// CHECK11-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !60 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !60 // CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP28]], i32 [[TMP29]] -// CHECK11-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !35 +// CHECK11-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !60 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP61:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: @@ -20974,21 +20974,21 @@ int main() { // CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !63 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !63 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !63 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]), !llvm.access.group !63 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !63 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP64:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -21086,36 +21086,36 @@ int main() { // CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !66 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !66 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !66 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !66 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !66 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !66 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !66 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !66 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !66 // CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group !66 // CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !66 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !66 // CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 [[TMP27]] -// CHECK11-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4 +// CHECK11-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !66 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !66 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !66 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP67:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -21220,21 +21220,21 @@ int main() { // CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !69 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !69 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !69 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !69 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]), !llvm.access.group !69 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !69 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !69 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !69 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP70:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -21332,36 +21332,36 @@ int main() { // CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !72 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !72 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !72 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !72 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !72 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !72 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !72 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !72 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !72 // CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group !72 // CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !72 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !72 // CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 [[TMP27]] -// CHECK11-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4 +// CHECK11-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !72 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !72 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !72 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP73:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -21472,45 +21472,45 @@ int main() { // CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !75 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]]), !llvm.access.group !75 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !75 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !75 // CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !75 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !75 // CHECK11-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] // CHECK11-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK11: cond.true10: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !75 // CHECK11-NEXT: br label [[COND_END12:%.*]] // CHECK11: cond.false11: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !75 // CHECK11-NEXT: br label [[COND_END12]] // CHECK11: cond.end12: // CHECK11-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE10]] ], [ [[TMP30]], [[COND_FALSE11]] ] -// CHECK11-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] +// CHECK11-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !75 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP76:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -21608,36 +21608,36 @@ int main() { // CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !78 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !78 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !78 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !78 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !78 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !78 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !78 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !78 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !78 // CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group !78 // CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !78 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !78 // CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 [[TMP27]] -// CHECK11-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4 +// CHECK11-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !78 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !78 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !78 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP79:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -21742,21 +21742,21 @@ int main() { // CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !81 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !81 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !81 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !81 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]), !llvm.access.group !81 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !81 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !81 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !81 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP82:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -21854,36 +21854,36 @@ int main() { // CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !84 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !84 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !84 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !84 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !84 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !84 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 [[TMP21]] -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !84 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !84 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !84 // CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group !84 // CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !84 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !84 // CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 [[TMP27]] -// CHECK11-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4 +// CHECK11-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !84 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !84 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !84 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP85:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -21997,24 +21997,24 @@ int main() { // CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !87 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !87 // CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**, i32)* @.omp_outlined..43 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !87 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !87 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !87 +// CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !87 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !87 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**, i32)* @.omp_outlined..43 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]], i32 [[TMP22]]), !llvm.access.group !87 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !87 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !87 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !87 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP88:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -22122,36 +22122,36 @@ int main() { // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !90 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !90 // CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !90 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP2]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !90 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !90 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !90 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 [[TMP24]] -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP3]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !90 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !90 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !90 // CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 [[TMP27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4, !llvm.access.group !90 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] -// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP1]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !90 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !90 // CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i32 [[TMP30]] -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX10]], align 4 +// CHECK11-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX10]], align 4, !llvm.access.group !90 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !90 // CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK11-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !90 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP91:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: @@ -22266,21 +22266,21 @@ int main() { // CHECK11-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !93 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !93 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**)* @.omp_outlined..47 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]) +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !93 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !93 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**)* @.omp_outlined..47 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]), !llvm.access.group !93 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !93 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !93 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !93 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP94:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -22375,36 +22375,36 @@ int main() { // CHECK11-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !96 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !96 // CHECK11-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !96 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !49 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !49 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !49 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !96 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !96 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !96 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i32 [[TMP22]] -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !49 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !49 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !49 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !96 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !96 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !96 // CHECK11-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[TMP24]], i32 [[TMP25]] -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4, !llvm.access.group !49 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4, !llvm.access.group !96 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !49 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !49 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !96 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !96 // CHECK11-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[TMP27]], i32 [[TMP28]] -// CHECK11-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX7]], align 4, !llvm.access.group !49 +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX7]], align 4, !llvm.access.group !96 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !96 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 -// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !96 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP97:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: @@ -22517,24 +22517,24 @@ int main() { // CHECK11-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !99 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !99 // CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**, i32)* @.omp_outlined..51 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]], i32 [[TMP22]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !99 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !99 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !99 +// CHECK11-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !99 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !99 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**, i32)* @.omp_outlined..51 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]], i32 [[TMP22]]), !llvm.access.group !99 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !99 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !99 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !99 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP100:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -22632,36 +22632,36 @@ int main() { // CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !102 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !102 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !102 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !53 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !53 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !53 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !102 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !102 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !102 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP22]], i32 [[TMP23]] -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !53 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !53 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !53 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !102 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !102 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !102 // CHECK11-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP25]], i32 [[TMP26]] -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group !53 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group !102 // CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], [[TMP27]] -// CHECK11-NEXT: [[TMP28:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !53 -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !53 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !102 +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !102 // CHECK11-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP28]], i32 [[TMP29]] -// CHECK11-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !53 +// CHECK11-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !102 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !102 // CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !102 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP103:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: @@ -23279,21 +23279,21 @@ int main() { // CHECK12-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !18 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -23391,36 +23391,36 @@ int main() { // CHECK12-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4 -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK12-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4 -// CHECK12-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4 -// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK12-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4 +// CHECK12-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK12-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4 -// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] -// CHECK12-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4 +// CHECK12-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !22 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -23525,21 +23525,21 @@ int main() { // CHECK12-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !27 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !27 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !27 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !27 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !27 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -23637,36 +23637,36 @@ int main() { // CHECK12-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4 -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !30 +// CHECK12-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !30 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK12-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4 -// CHECK12-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4 -// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !30 +// CHECK12-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !30 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK12-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4 +// CHECK12-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK12-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4 -// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !30 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] -// CHECK12-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4 +// CHECK12-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !30 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK12-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -23777,45 +23777,45 @@ int main() { // CHECK12-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !33 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK12-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]]) +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !33 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !33 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]]), !llvm.access.group !33 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !33 // CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !33 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !33 // CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !33 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !33 +// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !33 // CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !33 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !33 +// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !33 // CHECK12-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] // CHECK12-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK12: cond.true10: -// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !33 // CHECK12-NEXT: br label [[COND_END12:%.*]] // CHECK12: cond.false11: -// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !33 // CHECK12-NEXT: br label [[COND_END12]] // CHECK12: cond.end12: // CHECK12-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE10]] ], [ [[TMP30]], [[COND_FALSE11]] ] -// CHECK12-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK12-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !33 +// CHECK12-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !33 +// CHECK12-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -23913,36 +23913,36 @@ int main() { // CHECK12-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4 -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !36 +// CHECK12-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !36 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !36 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK12-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4 -// CHECK12-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4 -// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !36 +// CHECK12-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !36 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !36 // CHECK12-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK12-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4 +// CHECK12-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !36 // CHECK12-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK12-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4 -// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !36 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !36 // CHECK12-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] -// CHECK12-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4 +// CHECK12-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !36 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK12-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -24047,21 +24047,21 @@ int main() { // CHECK12-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !39 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !39 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !39 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !39 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -24159,36 +24159,36 @@ int main() { // CHECK12-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4 -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !42 +// CHECK12-NEXT: [[TMP20:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !42 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !42 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP20]], i32 [[TMP21]] -// CHECK12-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4 -// CHECK12-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4 -// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !42 +// CHECK12-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !42 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !42 // CHECK12-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK12-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4 +// CHECK12-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !42 // CHECK12-NEXT: [[ADD7:%.*]] = fadd double [[TMP22]], [[TMP25]] -// CHECK12-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4 -// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !42 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !42 // CHECK12-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] -// CHECK12-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4 +// CHECK12-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !42 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK12-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -24302,24 +24302,24 @@ int main() { // CHECK12-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !45 // CHECK12-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK12-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i32 [[TMP22]]) +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !45 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !45 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !45 +// CHECK12-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !45 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !45 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**, i32)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i32 [[TMP22]]), !llvm.access.group !45 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !45 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -24427,36 +24427,36 @@ int main() { // CHECK12: omp.dispatch.body: // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !48 // CHECK12-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK12-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK12-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP2]], align 4 -// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !48 +// CHECK12-NEXT: [[TMP23:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !48 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !48 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP23]], i32 [[TMP24]] -// CHECK12-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 4 -// CHECK12-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 4 -// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK12-NEXT: [[TMP25:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !48 +// CHECK12-NEXT: [[TMP26:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !48 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !48 // CHECK12-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP26]], i32 [[TMP27]] -// CHECK12-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX8]], align 4 +// CHECK12-NEXT: [[TMP28:%.*]] = load double, double* [[ARRAYIDX8]], align 4, !llvm.access.group !48 // CHECK12-NEXT: [[ADD9:%.*]] = fadd double [[TMP25]], [[TMP28]] -// CHECK12-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 4 -// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK12-NEXT: [[TMP29:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !48 +// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !48 // CHECK12-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds double, double* [[TMP29]], i32 [[TMP30]] -// CHECK12-NEXT: store double [[ADD9]], double* [[ARRAYIDX10]], align 4 +// CHECK12-NEXT: store double [[ADD9]], double* [[ARRAYIDX10]], align 4, !llvm.access.group !48 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 // CHECK12-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK12-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK12: omp.dispatch.inc: @@ -24571,21 +24571,21 @@ int main() { // CHECK12-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]) +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !51 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !51 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**)* @.omp_outlined..19 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], double** [[TMP1]], double** [[TMP2]], double** [[TMP3]]), !llvm.access.group !51 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !51 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -24680,36 +24680,36 @@ int main() { // CHECK12-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !54 // CHECK12-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK12-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !31 -// CHECK12-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !31 -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !31 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !54 +// CHECK12-NEXT: [[TMP21:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !54 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !54 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP21]], i32 [[TMP22]] -// CHECK12-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !31 -// CHECK12-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !31 -// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !31 +// CHECK12-NEXT: [[TMP23:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !54 +// CHECK12-NEXT: [[TMP24:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !54 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !54 // CHECK12-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds double, double* [[TMP24]], i32 [[TMP25]] -// CHECK12-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX5]], align 4, !llvm.access.group !31 +// CHECK12-NEXT: [[TMP26:%.*]] = load double, double* [[ARRAYIDX5]], align 4, !llvm.access.group !54 // CHECK12-NEXT: [[ADD6:%.*]] = fadd double [[TMP23]], [[TMP26]] -// CHECK12-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !31 -// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !31 +// CHECK12-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !54 +// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !54 // CHECK12-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds double, double* [[TMP27]], i32 [[TMP28]] -// CHECK12-NEXT: store double [[ADD6]], double* [[ARRAYIDX7]], align 4, !llvm.access.group !31 +// CHECK12-NEXT: store double [[ADD6]], double* [[ARRAYIDX7]], align 4, !llvm.access.group !54 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 // CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 -// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK12: omp.dispatch.inc: @@ -24822,24 +24822,24 @@ int main() { // CHECK12-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !57 // CHECK12-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK12-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**, i32)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i32 [[TMP22]]) +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !57 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !57 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !57 +// CHECK12-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !57 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !57 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, double**, double**, double**, i32)* @.omp_outlined..23 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], double** [[TMP2]], double** [[TMP3]], double** [[TMP4]], i32 [[TMP22]]), !llvm.access.group !57 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !57 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -24937,36 +24937,36 @@ int main() { // CHECK12-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !60 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !35 -// CHECK12-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !35 -// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !35 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !60 +// CHECK12-NEXT: [[TMP22:%.*]] = load double*, double** [[TMP2]], align 4, !llvm.access.group !60 +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !60 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds double, double* [[TMP22]], i32 [[TMP23]] -// CHECK12-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !35 -// CHECK12-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !35 -// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !35 +// CHECK12-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX]], align 4, !llvm.access.group !60 +// CHECK12-NEXT: [[TMP25:%.*]] = load double*, double** [[TMP3]], align 4, !llvm.access.group !60 +// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !60 // CHECK12-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds double, double* [[TMP25]], i32 [[TMP26]] -// CHECK12-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !35 +// CHECK12-NEXT: [[TMP27:%.*]] = load double, double* [[ARRAYIDX6]], align 4, !llvm.access.group !60 // CHECK12-NEXT: [[ADD7:%.*]] = fadd double [[TMP24]], [[TMP27]] -// CHECK12-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !35 -// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !35 +// CHECK12-NEXT: [[TMP28:%.*]] = load double*, double** [[TMP1]], align 4, !llvm.access.group !60 +// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !60 // CHECK12-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds double, double* [[TMP28]], i32 [[TMP29]] -// CHECK12-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !35 +// CHECK12-NEXT: store double [[ADD7]], double* [[ARRAYIDX8]], align 4, !llvm.access.group !60 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 // CHECK12-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 -// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !60 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP61:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK12: omp.dispatch.inc: @@ -25574,21 +25574,21 @@ int main() { // CHECK12-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !63 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]) +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !63 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !63 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**)* @.omp_outlined..27 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]), !llvm.access.group !63 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !63 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !63 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP64:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -25686,36 +25686,36 @@ int main() { // CHECK12-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !66 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !66 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !66 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 4 -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !66 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !66 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !66 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 [[TMP21]] -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK12-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 4 -// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !66 +// CHECK12-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !66 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !66 // CHECK12-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 [[TMP24]] -// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group !66 // CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK12-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 4 -// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !66 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !66 // CHECK12-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 [[TMP27]] -// CHECK12-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4 +// CHECK12-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !66 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !66 // CHECK12-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !66 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP67:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -25820,21 +25820,21 @@ int main() { // CHECK12-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !69 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !69 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]) +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !69 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !69 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**)* @.omp_outlined..31 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]), !llvm.access.group !69 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !69 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !69 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !69 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP70:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -25932,36 +25932,36 @@ int main() { // CHECK12-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !72 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !72 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !72 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 4 -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !72 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !72 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !72 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 [[TMP21]] -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK12-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 4 -// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !72 +// CHECK12-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !72 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !72 // CHECK12-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 [[TMP24]] -// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group !72 // CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK12-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 4 -// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !72 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !72 // CHECK12-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 [[TMP27]] -// CHECK12-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4 +// CHECK12-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !72 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !72 // CHECK12-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !72 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP73:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -26072,45 +26072,45 @@ int main() { // CHECK12-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !75 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !75 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK12-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]]) +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !75 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !75 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]]), !llvm.access.group !75 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !75 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !75 // CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !75 +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !75 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !75 // CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !75 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !75 +// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !75 // CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !75 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !75 +// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !75 // CHECK12-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] // CHECK12-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK12: cond.true10: -// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !llvm.access.group !75 // CHECK12-NEXT: br label [[COND_END12:%.*]] // CHECK12: cond.false11: -// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !75 // CHECK12-NEXT: br label [[COND_END12]] // CHECK12: cond.end12: // CHECK12-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE10]] ], [ [[TMP30]], [[COND_FALSE11]] ] -// CHECK12-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] +// CHECK12-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !75 +// CHECK12-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !75 +// CHECK12-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !75 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP76:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -26208,36 +26208,36 @@ int main() { // CHECK12-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !78 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !78 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !78 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 4 -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !78 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !78 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !78 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 [[TMP21]] -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK12-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 4 -// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !78 +// CHECK12-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !78 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !78 // CHECK12-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 [[TMP24]] -// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group !78 // CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK12-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 4 -// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !78 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !78 // CHECK12-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 [[TMP27]] -// CHECK12-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4 +// CHECK12-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !78 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !78 // CHECK12-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !78 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP79:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -26342,21 +26342,21 @@ int main() { // CHECK12-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !81 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !81 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]) +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !81 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !81 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**)* @.omp_outlined..39 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]), !llvm.access.group !81 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !81 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !81 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !81 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP82:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -26454,36 +26454,36 @@ int main() { // CHECK12-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !84 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !84 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !84 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 4 -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !84 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !84 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !84 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP20]], i32 [[TMP21]] -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK12-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 4 -// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !84 +// CHECK12-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !84 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !84 // CHECK12-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 [[TMP24]] -// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group !84 // CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP25]] -// CHECK12-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 4 -// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !84 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !84 // CHECK12-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 [[TMP27]] -// CHECK12-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4 +// CHECK12-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !84 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !84 // CHECK12-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP28]], 1 -// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !84 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP85:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -26597,24 +26597,24 @@ int main() { // CHECK12-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !87 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !87 // CHECK12-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK12-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**, i32)* @.omp_outlined..43 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]], i32 [[TMP22]]) +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !87 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !87 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !87 +// CHECK12-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !87 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !87 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**, i32)* @.omp_outlined..43 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]], i32 [[TMP22]]), !llvm.access.group !87 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !87 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !87 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !87 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP88:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -26722,36 +26722,36 @@ int main() { // CHECK12: omp.dispatch.body: // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !90 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !90 // CHECK12-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP20]], [[TMP21]] // CHECK12-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !90 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP22]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK12-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP2]], align 4 -// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !90 +// CHECK12-NEXT: [[TMP23:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !90 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !90 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP23]], i32 [[TMP24]] -// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK12-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP3]], align 4 -// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !90 +// CHECK12-NEXT: [[TMP26:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !90 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !90 // CHECK12-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP26]], i32 [[TMP27]] -// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4 +// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[ARRAYIDX8]], align 4, !llvm.access.group !90 // CHECK12-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP28]] -// CHECK12-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP1]], align 4 -// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK12-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !90 +// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !90 // CHECK12-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds i32, i32* [[TMP29]], i32 [[TMP30]] -// CHECK12-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX10]], align 4 +// CHECK12-NEXT: store i32 [[ADD9]], i32* [[ARRAYIDX10]], align 4, !llvm.access.group !90 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !90 // CHECK12-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK12-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !90 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP91:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK12: omp.dispatch.inc: @@ -26866,21 +26866,21 @@ int main() { // CHECK12-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !93 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !93 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**)* @.omp_outlined..47 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]) +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !93 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !93 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**)* @.omp_outlined..47 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32** [[TMP1]], i32** [[TMP2]], i32** [[TMP3]]), !llvm.access.group !93 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !93 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !93 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !93 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP94:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -26975,36 +26975,36 @@ int main() { // CHECK12-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !96 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !96 // CHECK12-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK12-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !96 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !49 -// CHECK12-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !49 -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !49 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !96 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !96 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !96 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP21]], i32 [[TMP22]] -// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !49 -// CHECK12-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !49 -// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !49 +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !96 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !96 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !96 // CHECK12-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, i32* [[TMP24]], i32 [[TMP25]] -// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4, !llvm.access.group !49 +// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[ARRAYIDX5]], align 4, !llvm.access.group !96 // CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP23]], [[TMP26]] -// CHECK12-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !49 -// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !49 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !96 +// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !96 // CHECK12-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i32, i32* [[TMP27]], i32 [[TMP28]] -// CHECK12-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX7]], align 4, !llvm.access.group !49 +// CHECK12-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX7]], align 4, !llvm.access.group !96 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !96 // CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP29]], 1 -// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !96 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP97:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK12: omp.dispatch.inc: @@ -27117,24 +27117,24 @@ int main() { // CHECK12-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !99 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !99 // CHECK12-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK12-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**, i32)* @.omp_outlined..51 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]], i32 [[TMP22]]) +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !99 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !99 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !99 +// CHECK12-NEXT: store i32 [[TMP21]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !99 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !99 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 7, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32**, i32**, i32**, i32)* @.omp_outlined..51 to void (i32*, i32*, ...)*), i32 [[TMP19]], i32 [[TMP20]], i32* [[TMP1]], i32** [[TMP2]], i32** [[TMP3]], i32** [[TMP4]], i32 [[TMP22]]), !llvm.access.group !99 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !99 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !99 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !99 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP100:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -27232,36 +27232,36 @@ int main() { // CHECK12-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !102 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !102 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP19]], [[TMP20]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !102 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP21]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !53 -// CHECK12-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !53 -// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !53 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !102 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP2]], align 4, !llvm.access.group !102 +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !102 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP22]], i32 [[TMP23]] -// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !53 -// CHECK12-NEXT: [[TMP25:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !53 -// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !53 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !102 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32*, i32** [[TMP3]], align 4, !llvm.access.group !102 +// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !102 // CHECK12-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i32, i32* [[TMP25]], i32 [[TMP26]] -// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group !53 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[ARRAYIDX6]], align 4, !llvm.access.group !102 // CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], [[TMP27]] -// CHECK12-NEXT: [[TMP28:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !53 -// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !53 +// CHECK12-NEXT: [[TMP28:%.*]] = load i32*, i32** [[TMP1]], align 4, !llvm.access.group !102 +// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !102 // CHECK12-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i32, i32* [[TMP28]], i32 [[TMP29]] -// CHECK12-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !53 +// CHECK12-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX8]], align 4, !llvm.access.group !102 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !102 // CHECK12-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP30]], 1 -// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !102 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP103:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK12: omp.dispatch.inc: diff --git a/clang/test/OpenMP/distribute_simd_codegen.cpp b/clang/test/OpenMP/distribute_simd_codegen.cpp index 1edf8c50d12ca..d79f71d8fd1a9 100644 --- a/clang/test/OpenMP/distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/distribute_simd_codegen.cpp @@ -273,45 +273,45 @@ int fint(void) { return ftemplate(); } // CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM2]] -// CHECK1-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[MUL4:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK1-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP20]] to i64 // CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM5]] -// CHECK1-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP21]] -// CHECK1-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP23]] to i64 // CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM8]] -// CHECK1-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4 +// CHECK1-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4, !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -491,7 +491,7 @@ int fint(void) { return ftemplate(); } // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -640,45 +640,45 @@ int fint(void) { return ftemplate(); } // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 // CHECK1-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM3]] -// CHECK1-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK1-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 // CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM6]] -// CHECK1-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK1-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 // CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM9]] -// CHECK1-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !17 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -845,26 +845,26 @@ int fint(void) { return ftemplate(); } // CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK1-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !20 // CHECK1-NEXT: [[CONV9:%.*]] = sext i8 [[TMP16]] to i32 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] // CHECK1-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK1-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1 +// CHECK1-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !llvm.access.group !20 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -994,23 +994,23 @@ int fint(void) { return ftemplate(); } // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -1171,45 +1171,45 @@ int fint(void) { return ftemplate(); } // CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP17]] to i64 // CHECK2-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM2]] -// CHECK2-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[MUL4:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK2-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP20]] to i64 // CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM5]] -// CHECK2-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP21]] -// CHECK2-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP23]] to i64 // CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM8]] -// CHECK2-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4 +// CHECK2-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4, !llvm.access.group !8 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK2-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1389,7 +1389,7 @@ int fint(void) { return ftemplate(); } // CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1538,45 +1538,45 @@ int fint(void) { return ftemplate(); } // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 // CHECK2-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 -// CHECK2-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !14 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !14 -// CHECK2-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !14 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 // CHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM3]] -// CHECK2-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK2-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !14 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 // CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM6]] -// CHECK2-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK2-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !14 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 // CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM9]] -// CHECK2-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !17 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 -// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -1743,26 +1743,26 @@ int fint(void) { return ftemplate(); } // CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK2-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !20 // CHECK2-NEXT: [[CONV9:%.*]] = sext i8 [[TMP16]] to i32 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] // CHECK2-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK2-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1 +// CHECK2-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !llvm.access.group !20 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK2-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1892,23 +1892,23 @@ int fint(void) { return ftemplate(); } // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -2069,41 +2069,41 @@ int fint(void) { return ftemplate(); } // CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i32 [[TMP14]] -// CHECK3-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP16]], i32 [[TMP17]] -// CHECK3-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX2]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[MUL3:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK3-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP19]], i32 [[TMP20]] -// CHECK3-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX4]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP22]], i32 [[TMP23]] -// CHECK3-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4 +// CHECK3-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2279,7 +2279,7 @@ int fint(void) { return ftemplate(); } // CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2428,41 +2428,41 @@ int fint(void) { return ftemplate(); } // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 // CHECK3-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i32 [[TMP15]] -// CHECK3-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 [[TMP18]] -// CHECK3-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK3-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK3-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP23]], i32 [[TMP24]] -// CHECK3-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !18 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -2629,26 +2629,26 @@ int fint(void) { return ftemplate(); } // CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK3-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK3-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !21 // CHECK3-NEXT: [[CONV9:%.*]] = sext i8 [[TMP16]] to i32 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] // CHECK3-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK3-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1 +// CHECK3-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !llvm.access.group !21 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2778,23 +2778,23 @@ int fint(void) { return ftemplate(); } // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -2955,41 +2955,41 @@ int fint(void) { return ftemplate(); } // CHECK4-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i32 [[TMP14]] -// CHECK4-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP16]], i32 [[TMP17]] -// CHECK4-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX2]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[MUL3:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK4-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP19]], i32 [[TMP20]] -// CHECK4-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX4]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP21]] -// CHECK4-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP22]], i32 [[TMP23]] -// CHECK4-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4 +// CHECK4-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !9 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3165,7 +3165,7 @@ int fint(void) { return ftemplate(); } // CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3314,41 +3314,41 @@ int fint(void) { return ftemplate(); } // CHECK4: omp.dispatch.body: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 // CHECK4-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 -// CHECK4-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !15 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i32 [[TMP15]] -// CHECK4-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !15 -// CHECK4-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !15 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 [[TMP18]] -// CHECK4-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK4-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !15 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 [[TMP21]] -// CHECK4-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK4-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !15 -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP23]], i32 [[TMP24]] -// CHECK4-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !18 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 -// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -3515,26 +3515,26 @@ int fint(void) { return ftemplate(); } // CHECK4-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK4-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK4-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK4-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !21 // CHECK4-NEXT: [[CONV9:%.*]] = sext i8 [[TMP16]] to i32 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] // CHECK4-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK4-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1 +// CHECK4-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !llvm.access.group !21 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK4-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK4-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3664,23 +3664,23 @@ int fint(void) { return ftemplate(); } // CHECK4: omp.dispatch.body: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -3841,45 +3841,45 @@ int fint(void) { return ftemplate(); } // CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 // CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 [[IDXPROM]] -// CHECK5-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 // CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP17]] to i64 // CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM2]] -// CHECK5-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !8 // CHECK5-NEXT: [[MUL4:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK5-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 // CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP20]] to i64 // CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM5]] -// CHECK5-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !llvm.access.group !8 // CHECK5-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP21]] -// CHECK5-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 // CHECK5-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP23]] to i64 // CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM8]] -// CHECK5-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4 +// CHECK5-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4, !llvm.access.group !8 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK5-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4030,7 +4030,7 @@ int fint(void) { return ftemplate(); } // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK5-NEXT: store i32 [[SUB]], i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 8, !nontemporal !12 +// CHECK5-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 8, !nontemporal !15 // CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 // CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[IDXPROM]] @@ -4047,7 +4047,7 @@ int fint(void) { return ftemplate(); } // CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM5]] // CHECK5-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX6]], align 4 // CHECK5-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK5-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 8, !nontemporal !12 +// CHECK5-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 8, !nontemporal !15 // CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 // CHECK5-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 // CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM8]] @@ -4059,7 +4059,7 @@ int fint(void) { return ftemplate(); } // CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4208,45 +4208,45 @@ int fint(void) { return ftemplate(); } // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 // CHECK5-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 -// CHECK5-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !15 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]] -// CHECK5-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !15 -// CHECK5-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !15 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 // CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM3]] -// CHECK5-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK5-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !15 -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 // CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM6]] -// CHECK5-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK5-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !15 -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 // CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM9]] -// CHECK5-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !18 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: @@ -4417,26 +4417,26 @@ int fint(void) { return ftemplate(); } // CHECK5: omp_if.then: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK5-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP17:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !21 // CHECK5-NEXT: [[CONV9:%.*]] = sext i8 [[TMP17]] to i32 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] // CHECK5-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK5-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !nontemporal !12 +// CHECK5-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !nontemporal !15, !llvm.access.group !21 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK5-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_IF_END:%.*]] // CHECK5: omp_if.else: @@ -4461,7 +4461,7 @@ int fint(void) { return ftemplate(); } // CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK5-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK5: omp.inner.for.end23: // CHECK5-NEXT: br label [[OMP_IF_END]] // CHECK5: omp_if.end: @@ -4593,23 +4593,23 @@ int fint(void) { return ftemplate(); } // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 // CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !26 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: @@ -4770,45 +4770,45 @@ int fint(void) { return ftemplate(); } // CHECK6-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8 -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK6-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 // CHECK6-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 [[IDXPROM]] -// CHECK6-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK6-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8 -// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !8 +// CHECK6-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 // CHECK6-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP17]] to i64 // CHECK6-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM2]] -// CHECK6-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !8 // CHECK6-NEXT: [[MUL4:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK6-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8 -// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 // CHECK6-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP20]] to i64 // CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM5]] -// CHECK6-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK6-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !llvm.access.group !8 // CHECK6-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP21]] -// CHECK6-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK6-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 // CHECK6-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP23]] to i64 // CHECK6-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM8]] -// CHECK6-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4 +// CHECK6-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4, !llvm.access.group !8 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK6-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK6-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -4959,7 +4959,7 @@ int fint(void) { return ftemplate(); } // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 // CHECK6-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK6-NEXT: store i32 [[SUB]], i32* [[I]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 8, !nontemporal !12 +// CHECK6-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 8, !nontemporal !15 // CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 // CHECK6-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[IDXPROM]] @@ -4976,7 +4976,7 @@ int fint(void) { return ftemplate(); } // CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM5]] // CHECK6-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX6]], align 4 // CHECK6-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK6-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 8, !nontemporal !12 +// CHECK6-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 8, !nontemporal !15 // CHECK6-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 // CHECK6-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 // CHECK6-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM8]] @@ -4988,7 +4988,7 @@ int fint(void) { return ftemplate(); } // CHECK6-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5137,45 +5137,45 @@ int fint(void) { return ftemplate(); } // CHECK6: omp.dispatch.body: // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] // CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 // CHECK6-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 -// CHECK6-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !15 -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]] -// CHECK6-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !15 -// CHECK6-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !15 -// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 // CHECK6-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM3]] -// CHECK6-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK6-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !15 -// CHECK6-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 // CHECK6-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM6]] -// CHECK6-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK6-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !15 -// CHECK6-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 // CHECK6-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM9]] -// CHECK6-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !18 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK6-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 -// CHECK6-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: @@ -5346,26 +5346,26 @@ int fint(void) { return ftemplate(); } // CHECK6: omp_if.then: // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK6-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP17:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK6-NEXT: [[TMP17:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !21 // CHECK6-NEXT: [[CONV9:%.*]] = sext i8 [[TMP17]] to i32 -// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK6-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] // CHECK6-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK6-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !nontemporal !12 +// CHECK6-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !nontemporal !15, !llvm.access.group !21 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK6-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_IF_END:%.*]] // CHECK6: omp_if.else: @@ -5390,7 +5390,7 @@ int fint(void) { return ftemplate(); } // CHECK6-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK6-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK6: omp.inner.for.end23: // CHECK6-NEXT: br label [[OMP_IF_END]] // CHECK6: omp_if.end: @@ -5522,23 +5522,23 @@ int fint(void) { return ftemplate(); } // CHECK6: omp.dispatch.body: // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 // CHECK6-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !26 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: @@ -5699,41 +5699,41 @@ int fint(void) { return ftemplate(); } // CHECK7-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i32 [[TMP14]] -// CHECK7-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 4 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK7-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP16]], i32 [[TMP17]] -// CHECK7-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX2]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !9 // CHECK7-NEXT: [[MUL3:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK7-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 4 -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK7-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP19]], i32 [[TMP20]] -// CHECK7-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX4]], align 4 +// CHECK7-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !9 // CHECK7-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP21]] -// CHECK7-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 4 -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK7-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP22]], i32 [[TMP23]] -// CHECK7-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4 +// CHECK7-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !9 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK7-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK7-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -5884,7 +5884,7 @@ int fint(void) { return ftemplate(); } // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 // CHECK7-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK7-NEXT: store i32 [[SUB]], i32* [[I]], align 4 -// CHECK7-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 4, !nontemporal !13 +// CHECK7-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 4, !nontemporal !16 // CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 [[TMP13]] // CHECK7-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 @@ -5898,7 +5898,7 @@ int fint(void) { return ftemplate(); } // CHECK7-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP18]], i32 [[TMP19]] // CHECK7-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX4]], align 4 // CHECK7-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP20]] -// CHECK7-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 4, !nontemporal !13 +// CHECK7-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 4, !nontemporal !16 // CHECK7-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 // CHECK7-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP21]], i32 [[TMP22]] // CHECK7-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4 @@ -5909,7 +5909,7 @@ int fint(void) { return ftemplate(); } // CHECK7-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -6058,41 +6058,41 @@ int fint(void) { return ftemplate(); } // CHECK7: omp.dispatch.body: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK7-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] // CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK7-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 // CHECK7-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !16 -// CHECK7-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !16 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i32 [[TMP15]] -// CHECK7-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !16 -// CHECK7-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !16 -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 +// CHECK7-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK7-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 [[TMP18]] -// CHECK7-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !16 +// CHECK7-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !19 // CHECK7-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK7-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !16 -// CHECK7-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 +// CHECK7-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK7-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 [[TMP21]] -// CHECK7-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !16 +// CHECK7-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !19 // CHECK7-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK7-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !16 -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 +// CHECK7-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK7-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP23]], i32 [[TMP24]] -// CHECK7-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !16 +// CHECK7-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !19 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK7-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK7-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 -// CHECK7-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: @@ -6263,26 +6263,26 @@ int fint(void) { return ftemplate(); } // CHECK7: omp_if.then: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK7-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK7-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP17:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK7-NEXT: [[TMP17:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !22 // CHECK7-NEXT: [[CONV9:%.*]] = sext i8 [[TMP17]] to i32 -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK7-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] // CHECK7-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK7-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !nontemporal !13 +// CHECK7-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !nontemporal !16, !llvm.access.group !22 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK7-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK7-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_IF_END:%.*]] // CHECK7: omp_if.else: @@ -6307,7 +6307,7 @@ int fint(void) { return ftemplate(); } // CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK7-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK7: omp.inner.for.end23: // CHECK7-NEXT: br label [[OMP_IF_END]] // CHECK7: omp_if.end: @@ -6439,23 +6439,23 @@ int fint(void) { return ftemplate(); } // CHECK7: omp.dispatch.body: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK7-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: @@ -6616,41 +6616,41 @@ int fint(void) { return ftemplate(); } // CHECK8-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK8-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 4 -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i32 [[TMP14]] -// CHECK8-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK8-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 4 -// CHECK8-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK8-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP16]], i32 [[TMP17]] -// CHECK8-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX2]], align 4 +// CHECK8-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[MUL3:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK8-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 4 -// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK8-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP19]], i32 [[TMP20]] -// CHECK8-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX4]], align 4 +// CHECK8-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP21]] -// CHECK8-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 4 -// CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK8-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP22]], i32 [[TMP23]] -// CHECK8-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4 +// CHECK8-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !9 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK8-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -6801,7 +6801,7 @@ int fint(void) { return ftemplate(); } // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 // CHECK8-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK8-NEXT: store i32 [[SUB]], i32* [[I]], align 4 -// CHECK8-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 4, !nontemporal !13 +// CHECK8-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 4, !nontemporal !16 // CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 [[TMP13]] // CHECK8-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 @@ -6815,7 +6815,7 @@ int fint(void) { return ftemplate(); } // CHECK8-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP18]], i32 [[TMP19]] // CHECK8-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX4]], align 4 // CHECK8-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP20]] -// CHECK8-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 4, !nontemporal !13 +// CHECK8-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 4, !nontemporal !16 // CHECK8-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 // CHECK8-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP21]], i32 [[TMP22]] // CHECK8-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4 @@ -6826,7 +6826,7 @@ int fint(void) { return ftemplate(); } // CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -6975,41 +6975,41 @@ int fint(void) { return ftemplate(); } // CHECK8: omp.dispatch.body: // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK8-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] // CHECK8-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK8-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 // CHECK8-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !16 -// CHECK8-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !16 -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 +// CHECK8-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !19 +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i32 [[TMP15]] -// CHECK8-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !16 -// CHECK8-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !16 -// CHECK8-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 +// CHECK8-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !19 +// CHECK8-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !19 +// CHECK8-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK8-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 [[TMP18]] -// CHECK8-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !16 +// CHECK8-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !19 // CHECK8-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK8-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !16 -// CHECK8-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 +// CHECK8-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !19 +// CHECK8-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK8-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 [[TMP21]] -// CHECK8-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !16 +// CHECK8-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !19 // CHECK8-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK8-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !16 -// CHECK8-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 +// CHECK8-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !19 +// CHECK8-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK8-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP23]], i32 [[TMP24]] -// CHECK8-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !16 +// CHECK8-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !19 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK8-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK8-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 -// CHECK8-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK8: omp.dispatch.inc: @@ -7180,26 +7180,26 @@ int fint(void) { return ftemplate(); } // CHECK8: omp_if.then: // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK8-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK8-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP17:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK8-NEXT: [[TMP17:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !22 // CHECK8-NEXT: [[CONV9:%.*]] = sext i8 [[TMP17]] to i32 -// CHECK8-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK8-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] // CHECK8-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK8-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !nontemporal !13 +// CHECK8-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !nontemporal !16, !llvm.access.group !22 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK8-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK8-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_IF_END:%.*]] // CHECK8: omp_if.else: @@ -7224,7 +7224,7 @@ int fint(void) { return ftemplate(); } // CHECK8-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK8-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK8: omp.inner.for.end23: // CHECK8-NEXT: br label [[OMP_IF_END]] // CHECK8: omp_if.end: @@ -7356,23 +7356,23 @@ int fint(void) { return ftemplate(); } // CHECK8: omp.dispatch.body: // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK8-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK8-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK8-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK8-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK8: omp.dispatch.inc: @@ -10145,45 +10145,45 @@ int fint(void) { return ftemplate(); } // CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 [[IDXPROM]] -// CHECK17-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK17-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP17]] to i64 // CHECK17-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM2]] -// CHECK17-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !9 // CHECK17-NEXT: [[MUL4:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK17-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK17-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP20]] to i64 // CHECK17-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM5]] -// CHECK17-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !llvm.access.group !9 // CHECK17-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP21]] -// CHECK17-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !9 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK17-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP23]] to i64 // CHECK17-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM8]] -// CHECK17-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4 +// CHECK17-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4, !llvm.access.group !9 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK17-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -10299,7 +10299,7 @@ int fint(void) { return ftemplate(); } // CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -10384,45 +10384,45 @@ int fint(void) { return ftemplate(); } // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK17-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK17-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 // CHECK17-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 -// CHECK17-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !14 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK17-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]] -// CHECK17-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !14 -// CHECK17-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !14 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK17-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 // CHECK17-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM3]] -// CHECK17-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !18 // CHECK17-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK17-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !14 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK17-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 // CHECK17-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM6]] -// CHECK17-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !18 // CHECK17-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK17-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !14 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK17-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 // CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM9]] -// CHECK17-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !18 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK17-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 -// CHECK17-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: @@ -10525,26 +10525,26 @@ int fint(void) { return ftemplate(); } // CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK17-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK17-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !21 // CHECK17-NEXT: [[CONV9:%.*]] = sext i8 [[TMP16]] to i32 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] // CHECK17-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK17-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1 +// CHECK17-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !llvm.access.group !21 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK17-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK17-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -10632,23 +10632,23 @@ int fint(void) { return ftemplate(); } // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: @@ -10738,45 +10738,45 @@ int fint(void) { return ftemplate(); } // CHECK18-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK18-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK18-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK18-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8 -// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK18-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !9 +// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK18-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 [[IDXPROM]] -// CHECK18-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK18-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8 -// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK18-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK18-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !9 +// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK18-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP17]] to i64 // CHECK18-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM2]] -// CHECK18-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK18-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !9 // CHECK18-NEXT: [[MUL4:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK18-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8 -// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK18-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !9 +// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK18-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP20]] to i64 // CHECK18-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM5]] -// CHECK18-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK18-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !llvm.access.group !9 // CHECK18-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP21]] -// CHECK18-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK18-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK18-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !9 +// CHECK18-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK18-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP23]] to i64 // CHECK18-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM8]] -// CHECK18-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4 +// CHECK18-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4, !llvm.access.group !9 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK18-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK18-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -10892,7 +10892,7 @@ int fint(void) { return ftemplate(); } // CHECK18-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -10977,45 +10977,45 @@ int fint(void) { return ftemplate(); } // CHECK18: omp.dispatch.body: // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK18-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] // CHECK18-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK18-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 // CHECK18-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 -// CHECK18-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !14 -// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK18-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !18 +// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK18-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]] -// CHECK18-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !14 -// CHECK18-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !14 -// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 +// CHECK18-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK18-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !18 +// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK18-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 // CHECK18-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM3]] -// CHECK18-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !14 +// CHECK18-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !18 // CHECK18-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK18-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !14 -// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 +// CHECK18-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !18 +// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK18-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 // CHECK18-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM6]] -// CHECK18-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !14 +// CHECK18-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !18 // CHECK18-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK18-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !14 -// CHECK18-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 +// CHECK18-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !18 +// CHECK18-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 // CHECK18-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 // CHECK18-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM9]] -// CHECK18-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !14 +// CHECK18-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !18 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK18-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK18-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 -// CHECK18-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: @@ -11118,26 +11118,26 @@ int fint(void) { return ftemplate(); } // CHECK18-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK18-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK18-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK18-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !21 // CHECK18-NEXT: [[CONV9:%.*]] = sext i8 [[TMP16]] to i32 -// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK18-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] // CHECK18-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK18-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1 +// CHECK18-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !llvm.access.group !21 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK18-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK18-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -11225,23 +11225,23 @@ int fint(void) { return ftemplate(); } // CHECK18: omp.dispatch.body: // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK18-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK18-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK18-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: @@ -11331,41 +11331,41 @@ int fint(void) { return ftemplate(); } // CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i32 [[TMP14]] -// CHECK19-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK19-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP16]], i32 [[TMP17]] -// CHECK19-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX2]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !10 // CHECK19-NEXT: [[MUL3:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK19-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK19-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP19]], i32 [[TMP20]] -// CHECK19-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX4]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !10 // CHECK19-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP21]] -// CHECK19-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP22]], i32 [[TMP23]] -// CHECK19-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4 +// CHECK19-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !10 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK19-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -11477,7 +11477,7 @@ int fint(void) { return ftemplate(); } // CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -11562,41 +11562,41 @@ int fint(void) { return ftemplate(); } // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK19-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] // CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK19-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 // CHECK19-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 -// CHECK19-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !15 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i32 [[TMP15]] -// CHECK19-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !15 -// CHECK19-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !15 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK19-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK19-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 [[TMP18]] -// CHECK19-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !15 +// CHECK19-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !19 // CHECK19-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK19-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !15 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK19-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK19-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 [[TMP21]] -// CHECK19-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !15 +// CHECK19-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !19 // CHECK19-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK19-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !15 -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK19-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK19-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP23]], i32 [[TMP24]] -// CHECK19-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !15 +// CHECK19-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !19 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK19-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 -// CHECK19-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: @@ -11699,26 +11699,26 @@ int fint(void) { return ftemplate(); } // CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK19-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK19-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !22 // CHECK19-NEXT: [[CONV9:%.*]] = sext i8 [[TMP16]] to i32 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK19-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] // CHECK19-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK19-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1 +// CHECK19-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !llvm.access.group !22 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK19-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK19-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -11806,23 +11806,23 @@ int fint(void) { return ftemplate(); } // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK19-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: @@ -11912,41 +11912,41 @@ int fint(void) { return ftemplate(); } // CHECK20-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK20-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK20-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK20-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 4 -// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK20-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !10 +// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i32 [[TMP14]] -// CHECK20-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK20-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 4 -// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK20-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK20-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !10 +// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK20-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP16]], i32 [[TMP17]] -// CHECK20-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX2]], align 4 +// CHECK20-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !10 // CHECK20-NEXT: [[MUL3:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK20-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 4 -// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK20-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !10 +// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK20-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP19]], i32 [[TMP20]] -// CHECK20-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX4]], align 4 +// CHECK20-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !10 // CHECK20-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP21]] -// CHECK20-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 4 -// CHECK20-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK20-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !10 +// CHECK20-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK20-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP22]], i32 [[TMP23]] -// CHECK20-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4 +// CHECK20-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !10 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK20-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK20-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -12058,7 +12058,7 @@ int fint(void) { return ftemplate(); } // CHECK20-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -12143,41 +12143,41 @@ int fint(void) { return ftemplate(); } // CHECK20: omp.dispatch.body: // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK20-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] // CHECK20-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK20-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 // CHECK20-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 -// CHECK20-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !15 -// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 +// CHECK20-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !19 +// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i32 [[TMP15]] -// CHECK20-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !15 -// CHECK20-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !15 -// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK20-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !19 +// CHECK20-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !19 +// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK20-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 [[TMP18]] -// CHECK20-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !15 +// CHECK20-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !19 // CHECK20-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK20-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !15 -// CHECK20-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK20-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !19 +// CHECK20-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK20-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 [[TMP21]] -// CHECK20-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !15 +// CHECK20-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !19 // CHECK20-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK20-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !15 -// CHECK20-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK20-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !19 +// CHECK20-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK20-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP23]], i32 [[TMP24]] -// CHECK20-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !15 +// CHECK20-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !19 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK20-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK20-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 -// CHECK20-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK20: omp.dispatch.inc: @@ -12280,26 +12280,26 @@ int fint(void) { return ftemplate(); } // CHECK20-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK20-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK20-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK20-NEXT: [[TMP16:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !22 // CHECK20-NEXT: [[CONV9:%.*]] = sext i8 [[TMP16]] to i32 -// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK20-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] // CHECK20-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK20-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1 +// CHECK20-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !llvm.access.group !22 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK20-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK20-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -12387,23 +12387,23 @@ int fint(void) { return ftemplate(); } // CHECK20: omp.dispatch.body: // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK20-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK20-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK20-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK20-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK20: omp.dispatch.inc: @@ -12493,45 +12493,45 @@ int fint(void) { return ftemplate(); } // CHECK21-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] -// CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK21-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8 -// CHECK21-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 +// CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK21-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !9 +// CHECK21-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK21-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 // CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 [[IDXPROM]] -// CHECK21-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK21-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8 -// CHECK21-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK21-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK21-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !9 +// CHECK21-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK21-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP17]] to i64 // CHECK21-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM2]] -// CHECK21-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK21-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !9 // CHECK21-NEXT: [[MUL4:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK21-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8 -// CHECK21-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK21-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !9 +// CHECK21-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK21-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP20]] to i64 // CHECK21-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM5]] -// CHECK21-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK21-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !llvm.access.group !9 // CHECK21-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP21]] -// CHECK21-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK21-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK21-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !9 +// CHECK21-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK21-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP23]] to i64 // CHECK21-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM8]] -// CHECK21-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4 +// CHECK21-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4, !llvm.access.group !9 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK21-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK21-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK21-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: @@ -12618,7 +12618,7 @@ int fint(void) { return ftemplate(); } // CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 // CHECK21-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK21-NEXT: store i32 [[SUB]], i32* [[I]], align 4 -// CHECK21-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 8, !nontemporal !12 +// CHECK21-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 8, !nontemporal !16 // CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 // CHECK21-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[IDXPROM]] @@ -12635,7 +12635,7 @@ int fint(void) { return ftemplate(); } // CHECK21-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM5]] // CHECK21-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX6]], align 4 // CHECK21-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK21-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 8, !nontemporal !12 +// CHECK21-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 8, !nontemporal !16 // CHECK21-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 // CHECK21-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 // CHECK21-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM8]] @@ -12647,7 +12647,7 @@ int fint(void) { return ftemplate(); } // CHECK21-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK21-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: @@ -12732,45 +12732,45 @@ int fint(void) { return ftemplate(); } // CHECK21: omp.dispatch.body: // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK21-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] // CHECK21-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK21-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 // CHECK21-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 -// CHECK21-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !15 -// CHECK21-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 +// CHECK21-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !19 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK21-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 // CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]] -// CHECK21-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !15 -// CHECK21-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !15 -// CHECK21-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK21-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !19 +// CHECK21-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !19 +// CHECK21-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK21-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 // CHECK21-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM3]] -// CHECK21-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !15 +// CHECK21-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !19 // CHECK21-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK21-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !15 -// CHECK21-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK21-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !19 +// CHECK21-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK21-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 // CHECK21-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM6]] -// CHECK21-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !15 +// CHECK21-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !19 // CHECK21-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK21-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !15 -// CHECK21-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK21-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !19 +// CHECK21-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK21-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 // CHECK21-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM9]] -// CHECK21-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !15 +// CHECK21-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !19 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK21-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK21-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 -// CHECK21-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK21-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK21: omp.dispatch.inc: @@ -12877,26 +12877,26 @@ int fint(void) { return ftemplate(); } // CHECK21: omp_if.then: // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK21-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK21-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP17:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK21-NEXT: [[TMP17:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !22 // CHECK21-NEXT: [[CONV9:%.*]] = sext i8 [[TMP17]] to i32 -// CHECK21-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK21-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] // CHECK21-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK21-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !nontemporal !12 +// CHECK21-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !nontemporal !16, !llvm.access.group !22 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK21-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK21-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK21-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_IF_END:%.*]] // CHECK21: omp_if.else: @@ -12921,7 +12921,7 @@ int fint(void) { return ftemplate(); } // CHECK21-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK21-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK21-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK21: omp.inner.for.end23: // CHECK21-NEXT: br label [[OMP_IF_END]] // CHECK21: omp_if.end: @@ -13011,23 +13011,23 @@ int fint(void) { return ftemplate(); } // CHECK21: omp.dispatch.body: // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK21-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK21-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK21-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK21-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK21: omp.dispatch.inc: @@ -13117,45 +13117,45 @@ int fint(void) { return ftemplate(); } // CHECK22-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK22-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK22: omp.inner.for.cond: -// CHECK22-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK22-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK22-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK22-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK22-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK22: omp.inner.for.body: -// CHECK22-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK22-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK22-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 // CHECK22-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] -// CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK22-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8 -// CHECK22-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 +// CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK22-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !9 +// CHECK22-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK22-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 // CHECK22-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 [[IDXPROM]] -// CHECK22-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK22-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8 -// CHECK22-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK22-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK22-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !9 +// CHECK22-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK22-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP17]] to i64 // CHECK22-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM2]] -// CHECK22-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX3]], align 4 +// CHECK22-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !9 // CHECK22-NEXT: [[MUL4:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK22-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8 -// CHECK22-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK22-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !9 +// CHECK22-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK22-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP20]] to i64 // CHECK22-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM5]] -// CHECK22-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX6]], align 4 +// CHECK22-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !llvm.access.group !9 // CHECK22-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP21]] -// CHECK22-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8 -// CHECK22-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK22-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !9 +// CHECK22-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK22-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP23]] to i64 // CHECK22-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM8]] -// CHECK22-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4 +// CHECK22-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4, !llvm.access.group !9 // CHECK22-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK22: omp.body.continue: // CHECK22-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK22: omp.inner.for.inc: -// CHECK22-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK22-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK22-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK22-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK22-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK22: omp.inner.for.end: // CHECK22-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK22: omp.loop.exit: @@ -13242,7 +13242,7 @@ int fint(void) { return ftemplate(); } // CHECK22-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 // CHECK22-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK22-NEXT: store i32 [[SUB]], i32* [[I]], align 4 -// CHECK22-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 8, !nontemporal !12 +// CHECK22-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 8, !nontemporal !16 // CHECK22-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 // CHECK22-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK22-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[IDXPROM]] @@ -13259,7 +13259,7 @@ int fint(void) { return ftemplate(); } // CHECK22-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM5]] // CHECK22-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX6]], align 4 // CHECK22-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]] -// CHECK22-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 8, !nontemporal !12 +// CHECK22-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 8, !nontemporal !16 // CHECK22-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 // CHECK22-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64 // CHECK22-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM8]] @@ -13271,7 +13271,7 @@ int fint(void) { return ftemplate(); } // CHECK22-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK22-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK22-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK22: omp.inner.for.end: // CHECK22-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK22: omp.loop.exit: @@ -13356,45 +13356,45 @@ int fint(void) { return ftemplate(); } // CHECK22: omp.dispatch.body: // CHECK22-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK22: omp.inner.for.cond: -// CHECK22-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK22-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK22-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK22-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK22-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] // CHECK22-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK22: omp.inner.for.body: -// CHECK22-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK22-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK22-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 // CHECK22-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 -// CHECK22-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !15 -// CHECK22-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 +// CHECK22-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !19 +// CHECK22-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK22-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64 // CHECK22-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]] -// CHECK22-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !15 -// CHECK22-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !15 -// CHECK22-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK22-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !19 +// CHECK22-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !19 +// CHECK22-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK22-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64 // CHECK22-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM3]] -// CHECK22-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !15 +// CHECK22-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !19 // CHECK22-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK22-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !15 -// CHECK22-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK22-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !19 +// CHECK22-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK22-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64 // CHECK22-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM6]] -// CHECK22-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !15 +// CHECK22-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !19 // CHECK22-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]] -// CHECK22-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !15 -// CHECK22-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 +// CHECK22-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !19 +// CHECK22-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 // CHECK22-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64 // CHECK22-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM9]] -// CHECK22-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !15 +// CHECK22-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !19 // CHECK22-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK22: omp.body.continue: // CHECK22-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK22: omp.inner.for.inc: -// CHECK22-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK22-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK22-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1 -// CHECK22-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK22-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK22: omp.inner.for.end: // CHECK22-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK22: omp.dispatch.inc: @@ -13501,26 +13501,26 @@ int fint(void) { return ftemplate(); } // CHECK22: omp_if.then: // CHECK22-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK22: omp.inner.for.cond: -// CHECK22-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK22-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK22-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK22-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK22-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK22: omp.inner.for.body: -// CHECK22-NEXT: [[TMP17:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK22-NEXT: [[TMP17:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !22 // CHECK22-NEXT: [[CONV9:%.*]] = sext i8 [[TMP17]] to i32 -// CHECK22-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK22-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK22-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK22-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] // CHECK22-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK22-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !nontemporal !12 +// CHECK22-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !nontemporal !16, !llvm.access.group !22 // CHECK22-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK22: omp.body.continue: // CHECK22-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK22: omp.inner.for.inc: -// CHECK22-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK22-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK22-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK22-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK22-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK22: omp.inner.for.end: // CHECK22-NEXT: br label [[OMP_IF_END:%.*]] // CHECK22: omp_if.else: @@ -13545,7 +13545,7 @@ int fint(void) { return ftemplate(); } // CHECK22-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK22-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK22-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK22: omp.inner.for.end23: // CHECK22-NEXT: br label [[OMP_IF_END]] // CHECK22: omp_if.end: @@ -13635,23 +13635,23 @@ int fint(void) { return ftemplate(); } // CHECK22: omp.dispatch.body: // CHECK22-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK22: omp.inner.for.cond: -// CHECK22-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK22-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK22-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK22-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK22-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK22-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK22: omp.inner.for.body: -// CHECK22-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK22-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK22-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK22-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK22-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK22: omp.body.continue: // CHECK22-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK22: omp.inner.for.inc: -// CHECK22-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK22-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK22-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK22-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK22-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK22: omp.inner.for.end: // CHECK22-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK22: omp.dispatch.inc: @@ -13741,41 +13741,41 @@ int fint(void) { return ftemplate(); } // CHECK23-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] -// CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK23-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 4 -// CHECK23-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 +// CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK23-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !10 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i32 [[TMP14]] -// CHECK23-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK23-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 4 -// CHECK23-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK23-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !10 +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK23-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP16]], i32 [[TMP17]] -// CHECK23-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX2]], align 4 +// CHECK23-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !10 // CHECK23-NEXT: [[MUL3:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK23-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 4 -// CHECK23-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK23-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !10 +// CHECK23-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK23-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP19]], i32 [[TMP20]] -// CHECK23-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX4]], align 4 +// CHECK23-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !10 // CHECK23-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP21]] -// CHECK23-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 4 -// CHECK23-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK23-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !10 +// CHECK23-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK23-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP22]], i32 [[TMP23]] -// CHECK23-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4 +// CHECK23-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !10 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK23-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK23-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK23-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: @@ -13862,7 +13862,7 @@ int fint(void) { return ftemplate(); } // CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 // CHECK23-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK23-NEXT: store i32 [[SUB]], i32* [[I]], align 4 -// CHECK23-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 4, !nontemporal !13 +// CHECK23-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 4, !nontemporal !17 // CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 // CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 [[TMP13]] // CHECK23-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 @@ -13876,7 +13876,7 @@ int fint(void) { return ftemplate(); } // CHECK23-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP18]], i32 [[TMP19]] // CHECK23-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX4]], align 4 // CHECK23-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP20]] -// CHECK23-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 4, !nontemporal !13 +// CHECK23-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 4, !nontemporal !17 // CHECK23-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 // CHECK23-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP21]], i32 [[TMP22]] // CHECK23-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4 @@ -13887,7 +13887,7 @@ int fint(void) { return ftemplate(); } // CHECK23-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK23-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: @@ -13972,41 +13972,41 @@ int fint(void) { return ftemplate(); } // CHECK23: omp.dispatch.body: // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 // CHECK23-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] // CHECK23-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK23-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 // CHECK23-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !16 -// CHECK23-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !16 -// CHECK23-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 +// CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !20 +// CHECK23-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !20 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 // CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i32 [[TMP15]] -// CHECK23-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !16 -// CHECK23-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !16 -// CHECK23-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 +// CHECK23-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !20 +// CHECK23-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !20 +// CHECK23-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 // CHECK23-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 [[TMP18]] -// CHECK23-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !16 +// CHECK23-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !20 // CHECK23-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK23-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !16 -// CHECK23-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 +// CHECK23-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !20 +// CHECK23-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 // CHECK23-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 [[TMP21]] -// CHECK23-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !16 +// CHECK23-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !20 // CHECK23-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK23-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !16 -// CHECK23-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 +// CHECK23-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !20 +// CHECK23-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 // CHECK23-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP23]], i32 [[TMP24]] -// CHECK23-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !16 +// CHECK23-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !20 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK23-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK23-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 -// CHECK23-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK23-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK23: omp.dispatch.inc: @@ -14113,26 +14113,26 @@ int fint(void) { return ftemplate(); } // CHECK23: omp_if.then: // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK23-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK23-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP17:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK23-NEXT: [[TMP17:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !23 // CHECK23-NEXT: [[CONV9:%.*]] = sext i8 [[TMP17]] to i32 -// CHECK23-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK23-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] // CHECK23-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK23-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !nontemporal !13 +// CHECK23-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !nontemporal !17, !llvm.access.group !23 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK23-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK23-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK23-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_IF_END:%.*]] // CHECK23: omp_if.else: @@ -14157,7 +14157,7 @@ int fint(void) { return ftemplate(); } // CHECK23-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK23-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK23-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK23: omp.inner.for.end23: // CHECK23-NEXT: br label [[OMP_IF_END]] // CHECK23: omp_if.end: @@ -14247,23 +14247,23 @@ int fint(void) { return ftemplate(); } // CHECK23: omp.dispatch.body: // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 // CHECK23-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK23-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK23-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK23-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK23-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK23: omp.dispatch.inc: @@ -14353,41 +14353,41 @@ int fint(void) { return ftemplate(); } // CHECK24-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK24-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK24: omp.inner.for.cond: -// CHECK24-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK24-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK24-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK24-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK24-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK24: omp.inner.for.body: -// CHECK24-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK24-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK24-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 7 // CHECK24-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]] -// CHECK24-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK24-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 4 -// CHECK24-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4 +// CHECK24-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK24-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !10 +// CHECK24-NEXT: [[TMP14:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK24-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i32 [[TMP14]] -// CHECK24-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4 -// CHECK24-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 4 -// CHECK24-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4 +// CHECK24-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK24-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !10 +// CHECK24-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK24-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP16]], i32 [[TMP17]] -// CHECK24-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX2]], align 4 +// CHECK24-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !10 // CHECK24-NEXT: [[MUL3:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK24-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 4 -// CHECK24-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4 +// CHECK24-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !10 +// CHECK24-NEXT: [[TMP20:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK24-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP19]], i32 [[TMP20]] -// CHECK24-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX4]], align 4 +// CHECK24-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !10 // CHECK24-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP21]] -// CHECK24-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 4 -// CHECK24-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4 +// CHECK24-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !10 +// CHECK24-NEXT: [[TMP23:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK24-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP22]], i32 [[TMP23]] -// CHECK24-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4 +// CHECK24-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !10 // CHECK24-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK24: omp.body.continue: // CHECK24-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK24: omp.inner.for.inc: -// CHECK24-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK24-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK24-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK24-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK24-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK24: omp.inner.for.end: // CHECK24-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK24: omp.loop.exit: @@ -14474,7 +14474,7 @@ int fint(void) { return ftemplate(); } // CHECK24-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7 // CHECK24-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]] // CHECK24-NEXT: store i32 [[SUB]], i32* [[I]], align 4 -// CHECK24-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 4, !nontemporal !13 +// CHECK24-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 4, !nontemporal !17 // CHECK24-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 // CHECK24-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i32 [[TMP13]] // CHECK24-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4 @@ -14488,7 +14488,7 @@ int fint(void) { return ftemplate(); } // CHECK24-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP18]], i32 [[TMP19]] // CHECK24-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX4]], align 4 // CHECK24-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP20]] -// CHECK24-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 4, !nontemporal !13 +// CHECK24-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 4, !nontemporal !17 // CHECK24-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4 // CHECK24-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP21]], i32 [[TMP22]] // CHECK24-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4 @@ -14499,7 +14499,7 @@ int fint(void) { return ftemplate(); } // CHECK24-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK24-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK24-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK24: omp.inner.for.end: // CHECK24-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK24: omp.loop.exit: @@ -14584,41 +14584,41 @@ int fint(void) { return ftemplate(); } // CHECK24: omp.dispatch.body: // CHECK24-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK24: omp.inner.for.cond: -// CHECK24-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK24-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 +// CHECK24-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK24-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 // CHECK24-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]] // CHECK24-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK24: omp.inner.for.body: -// CHECK24-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK24-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK24-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127 // CHECK24-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]] -// CHECK24-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !16 -// CHECK24-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !16 -// CHECK24-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 +// CHECK24-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !20 +// CHECK24-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 4, !llvm.access.group !20 +// CHECK24-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 // CHECK24-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i32 [[TMP15]] -// CHECK24-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !16 -// CHECK24-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !16 -// CHECK24-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 +// CHECK24-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !20 +// CHECK24-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 4, !llvm.access.group !20 +// CHECK24-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 // CHECK24-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP17]], i32 [[TMP18]] -// CHECK24-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !16 +// CHECK24-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !llvm.access.group !20 // CHECK24-NEXT: [[MUL4:%.*]] = fmul float [[TMP16]], [[TMP19]] -// CHECK24-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !16 -// CHECK24-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 +// CHECK24-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 4, !llvm.access.group !20 +// CHECK24-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 // CHECK24-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds float, float* [[TMP20]], i32 [[TMP21]] -// CHECK24-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !16 +// CHECK24-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX5]], align 4, !llvm.access.group !20 // CHECK24-NEXT: [[MUL6:%.*]] = fmul float [[MUL4]], [[TMP22]] -// CHECK24-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !16 -// CHECK24-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 +// CHECK24-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 4, !llvm.access.group !20 +// CHECK24-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 // CHECK24-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP23]], i32 [[TMP24]] -// CHECK24-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !16 +// CHECK24-NEXT: store float [[MUL6]], float* [[ARRAYIDX7]], align 4, !llvm.access.group !20 // CHECK24-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK24: omp.body.continue: // CHECK24-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK24: omp.inner.for.inc: -// CHECK24-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK24-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK24-NEXT: [[ADD8:%.*]] = add i32 [[TMP25]], 1 -// CHECK24-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 -// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK24-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK24: omp.inner.for.end: // CHECK24-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK24: omp.dispatch.inc: @@ -14725,26 +14725,26 @@ int fint(void) { return ftemplate(); } // CHECK24: omp_if.then: // CHECK24-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK24: omp.inner.for.cond: -// CHECK24-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK24-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK24-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK24-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK24-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK24: omp.inner.for.body: -// CHECK24-NEXT: [[TMP17:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1 +// CHECK24-NEXT: [[TMP17:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !23 // CHECK24-NEXT: [[CONV9:%.*]] = sext i8 [[TMP17]] to i32 -// CHECK24-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK24-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK24-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK24-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], [[MUL]] // CHECK24-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i8 -// CHECK24-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !nontemporal !13 +// CHECK24-NEXT: store i8 [[CONV11]], i8* [[I6]], align 1, !nontemporal !17, !llvm.access.group !23 // CHECK24-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK24: omp.body.continue: // CHECK24-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK24: omp.inner.for.inc: -// CHECK24-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK24-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK24-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK24-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK24-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK24: omp.inner.for.end: // CHECK24-NEXT: br label [[OMP_IF_END:%.*]] // CHECK24: omp_if.else: @@ -14769,7 +14769,7 @@ int fint(void) { return ftemplate(); } // CHECK24-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK24-NEXT: [[ADD22:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK24-NEXT: store i32 [[ADD22]], i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND13]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK24: omp.inner.for.end23: // CHECK24-NEXT: br label [[OMP_IF_END]] // CHECK24: omp_if.end: @@ -14859,23 +14859,23 @@ int fint(void) { return ftemplate(); } // CHECK24: omp.dispatch.body: // CHECK24-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK24: omp.inner.for.cond: -// CHECK24-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK24-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK24-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK24-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 // CHECK24-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK24-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK24: omp.inner.for.body: -// CHECK24-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK24-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK24-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK24-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK24-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK24-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28 // CHECK24-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK24: omp.body.continue: // CHECK24-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK24: omp.inner.for.inc: -// CHECK24-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK24-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK24-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK24-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK24-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK24: omp.inner.for.end: // CHECK24-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK24: omp.dispatch.inc: diff --git a/clang/test/OpenMP/for_reduction_codegen_UDR.cpp b/clang/test/OpenMP/for_reduction_codegen_UDR.cpp index 1fdb674844f14..d778cab63507b 100644 --- a/clang/test/OpenMP/for_reduction_codegen_UDR.cpp +++ b/clang/test/OpenMP/for_reduction_codegen_UDR.cpp @@ -514,24 +514,24 @@ int main() { // CHECK1-NEXT: call void @.omp_initializer.(%struct.S* [[S1]], %struct.S* [[TMP0]]) // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10 // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !3 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP3]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: store i32 10, i32* [[I]], align 4 // CHECK1-NEXT: call void @.omp_combiner.(%struct.S* [[TMP0]], %struct.S* [[S1]]) @@ -3585,24 +3585,24 @@ int main() { // CHECK2-NEXT: call void @.omp_initializer.(%struct.S* [[S1]], %struct.S* [[TMP0]]) // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 // CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP1]], 10 // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP2]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !2 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !3 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP3]], 1 -// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: store i32 10, i32* [[I]], align 4 // CHECK2-NEXT: call void @.omp_combiner.(%struct.S* [[TMP0]], %struct.S* [[S1]]) diff --git a/clang/test/OpenMP/for_reduction_task_codegen.cpp b/clang/test/OpenMP/for_reduction_task_codegen.cpp index ac7c2bfe208b1..9084fd57b605b 100644 --- a/clang/test/OpenMP/for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/for_reduction_task_codegen.cpp @@ -457,26 +457,26 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5:[0-9]+]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !11 +// CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* @@ -510,8 +510,8 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] // CHECK1-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP48:%.*]] = getelementptr i8, i8* [[TMP40]], i64 [[TMP47]] -// CHECK1-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !11 +// CHECK1-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !12 // CHECK1-NEXT: ret i32 0 // // @@ -985,26 +985,26 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK2-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK2-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK2-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5:[0-9]+]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !11 +// CHECK2-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK2-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK2-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK2-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK2-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK2-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* @@ -1038,8 +1038,8 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] // CHECK2-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK2-NEXT: [[TMP48:%.*]] = getelementptr i8, i8* [[TMP40]], i64 [[TMP47]] -// CHECK2-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !11 +// CHECK2-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !12 // CHECK2-NEXT: ret i32 0 // // @@ -1091,4 +1091,3 @@ int main(int argc, char **argv) { // CHECK2: omp.arraycpy.done5: // CHECK2-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/for_simd_codegen.cpp b/clang/test/OpenMP/for_simd_codegen.cpp index 2d1b0f073e2e7..f4b2bc93b208b 100644 --- a/clang/test/OpenMP/for_simd_codegen.cpp +++ b/clang/test/OpenMP/for_simd_codegen.cpp @@ -793,7 +793,7 @@ void parallel_simd(float *a) { #pragma omp for simd // TERM_DEBUG-NOT: __kmpc_global_thread_num // TERM_DEBUG: invoke i32 {{.*}}bar{{.*}}() - // TERM_DEBUG: unwind label %[[TERM_LPAD:.+]], + // TERM_DEBUG: unwind label %[[TERM_LPAD:[a-zA-Z0-9\.]+]], // TERM_DEBUG-NOT: __kmpc_global_thread_num // TERM_DEBUG: [[TERM_LPAD]] // TERM_DEBUG: call void @__clang_call_terminate diff --git a/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp b/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp index ae8c675644ad6..c6198994c9d26 100644 --- a/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp +++ b/clang/test/OpenMP/master_taskloop_in_reduction_codegen.cpp @@ -601,35 +601,35 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK1-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// CHECK1-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 1 // CHECK1-NEXT: [[TMP24:%.*]] = load i64, i64* [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 +// CHECK1-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP27:%.*]] = bitcast void (i8*, ...)* [[TMP25]] to void (i8*, i8***, i8***)* // CHECK1-NEXT: call void [[TMP27]](i8* [[TMP26]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR3]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP29:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !13 +// CHECK1-NEXT: [[TMP28:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP29:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP30]], align 8 // CHECK1-NEXT: [[TMP32:%.*]] = load i8*, i8** [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP31]] to i8* // CHECK1-NEXT: [[TMP35:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP33]], i8* [[TMP32]], i8* [[TMP34]]) #[[ATTR3]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP35]] to i32* @@ -644,19 +644,19 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP43:%.*]] = bitcast i16* [[TMP37]] to i8* // CHECK1-NEXT: [[TMP44:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP33]], i8* [[TMP42]], i8* [[TMP43]]) #[[ATTR3]] // CHECK1-NEXT: [[CONV2_I:%.*]] = bitcast i8* [[TMP44]] to i16* -// CHECK1-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 +// CHECK1-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[CONV3_I:%.*]] = trunc i64 [[TMP45]] to i32 -// CHECK1-NEXT: store i32 [[CONV3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK1-NEXT: store i32 [[CONV3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK1: omp.inner.for.cond.i: -// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK1-NEXT: [[CONV4_I:%.*]] = sext i32 [[TMP46]] to i64 -// CHECK1-NEXT: [[TMP47:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 +// CHECK1-NEXT: [[TMP47:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV4_I]], [[TMP47]] // CHECK1-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__9_EXIT:%.*]] // CHECK1: omp.inner.for.body.i: -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 -// CHECK1-NEXT: store i32 [[TMP48]], i32* [[I_I]], align 4, !noalias !13 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK1-NEXT: store i32 [[TMP48]], i32* [[I_I]], align 4, !noalias !14 // CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[CONV_I]], align 4 // CHECK1-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[TMP49]] to i64 // CHECK1-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i16, i16* [[CONV2_I]], i64 [[IDXPROM_I]] @@ -665,9 +665,9 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[CONV_I]], align 4 // CHECK1-NEXT: [[ADD6_I:%.*]] = add nsw i32 [[TMP51]], [[CONV5_I]] // CHECK1-NEXT: store i32 [[ADD6_I]], i32* [[CONV_I]], align 4 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK1-NEXT: [[ADD7_I:%.*]] = add nsw i32 [[TMP52]], 1 -// CHECK1-NEXT: store i32 [[ADD7_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK1-NEXT: store i32 [[ADD7_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I]] // CHECK1: .omp_outlined..9.exit: // CHECK1-NEXT: ret i32 0 @@ -1278,35 +1278,35 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK2-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 -// CHECK2-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// CHECK2-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 1 // CHECK2-NEXT: [[TMP24:%.*]] = load i64, i64* [[TMP23]], align 8 -// CHECK2-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 +// CHECK2-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK2-NEXT: [[TMP27:%.*]] = bitcast void (i8*, ...)* [[TMP25]] to void (i8*, i8***, i8***)* // CHECK2-NEXT: call void [[TMP27]](i8* [[TMP26]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR3]] -// CHECK2-NEXT: [[TMP28:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP29:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !13 +// CHECK2-NEXT: [[TMP28:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP29:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 // CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 0 // CHECK2-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP30]], align 8 // CHECK2-NEXT: [[TMP32:%.*]] = load i8*, i8** [[TMP28]], align 8 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 // CHECK2-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP31]] to i8* // CHECK2-NEXT: [[TMP35:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP33]], i8* [[TMP32]], i8* [[TMP34]]) #[[ATTR3]] // CHECK2-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP35]] to i32* @@ -1321,19 +1321,19 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP43:%.*]] = bitcast i16* [[TMP37]] to i8* // CHECK2-NEXT: [[TMP44:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP33]], i8* [[TMP42]], i8* [[TMP43]]) #[[ATTR3]] // CHECK2-NEXT: [[CONV2_I:%.*]] = bitcast i8* [[TMP44]] to i16* -// CHECK2-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 +// CHECK2-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK2-NEXT: [[CONV3_I:%.*]] = trunc i64 [[TMP45]] to i32 -// CHECK2-NEXT: store i32 [[CONV3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK2-NEXT: store i32 [[CONV3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK2: omp.inner.for.cond.i: -// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK2-NEXT: [[CONV4_I:%.*]] = sext i32 [[TMP46]] to i64 -// CHECK2-NEXT: [[TMP47:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 +// CHECK2-NEXT: [[TMP47:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 // CHECK2-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV4_I]], [[TMP47]] // CHECK2-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__9_EXIT:%.*]] // CHECK2: omp.inner.for.body.i: -// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 -// CHECK2-NEXT: store i32 [[TMP48]], i32* [[I_I]], align 4, !noalias !13 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK2-NEXT: store i32 [[TMP48]], i32* [[I_I]], align 4, !noalias !14 // CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[CONV_I]], align 4 // CHECK2-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[TMP49]] to i64 // CHECK2-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i16, i16* [[CONV2_I]], i64 [[IDXPROM_I]] @@ -1342,9 +1342,9 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP51:%.*]] = load i32, i32* [[CONV_I]], align 4 // CHECK2-NEXT: [[ADD6_I:%.*]] = add nsw i32 [[TMP51]], [[CONV5_I]] // CHECK2-NEXT: store i32 [[ADD6_I]], i32* [[CONV_I]], align 4 -// CHECK2-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK2-NEXT: [[ADD7_I:%.*]] = add nsw i32 [[TMP52]], 1 -// CHECK2-NEXT: store i32 [[ADD7_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK2-NEXT: store i32 [[ADD7_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I]] // CHECK2: .omp_outlined..9.exit: // CHECK2-NEXT: ret i32 0 @@ -1393,4 +1393,3 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 // CHECK2-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp b/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp index cb5cbcd74434d..e6b68b4766f4e 100644 --- a/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp +++ b/clang/test/OpenMP/master_taskloop_simd_in_reduction_codegen.cpp @@ -601,35 +601,35 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK1-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// CHECK1-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 1 // CHECK1-NEXT: [[TMP24:%.*]] = load i64, i64* [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 +// CHECK1-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP27:%.*]] = bitcast void (i8*, ...)* [[TMP25]] to void (i8*, i8***, i8***)* // CHECK1-NEXT: call void [[TMP27]](i8* [[TMP26]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR3]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP29:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !13 +// CHECK1-NEXT: [[TMP28:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP29:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP30]], align 8 // CHECK1-NEXT: [[TMP32:%.*]] = load i8*, i8** [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP31]] to i8* // CHECK1-NEXT: [[TMP35:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP33]], i8* [[TMP32]], i8* [[TMP34]]) #[[ATTR3]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP35]] to i32* @@ -644,31 +644,31 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP43:%.*]] = bitcast i16* [[TMP37]] to i8* // CHECK1-NEXT: [[TMP44:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP33]], i8* [[TMP42]], i8* [[TMP43]]) #[[ATTR3]] // CHECK1-NEXT: [[CONV2_I:%.*]] = bitcast i8* [[TMP44]] to i16* -// CHECK1-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 +// CHECK1-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[CONV3_I:%.*]] = trunc i64 [[TMP45]] to i32 -// CHECK1-NEXT: store i32 [[CONV3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK1-NEXT: store i32 [[CONV3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK1: omp.inner.for.cond.i: -// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 // CHECK1-NEXT: [[CONV4_I:%.*]] = sext i32 [[TMP46]] to i64 -// CHECK1-NEXT: [[TMP47:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !13, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP47:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14, !llvm.access.group !15 // CHECK1-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV4_I]], [[TMP47]] // CHECK1-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__9_EXIT:%.*]] // CHECK1: omp.inner.for.body.i: -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK1-NEXT: store i32 [[TMP48]], i32* [[I_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[CONV_I]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK1-NEXT: store i32 [[TMP48]], i32* [[I_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[CONV_I]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[TMP49]] to i64 // CHECK1-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i16, i16* [[CONV2_I]], i64 [[IDXPROM_I]] -// CHECK1-NEXT: [[TMP50:%.*]] = load i16, i16* [[ARRAYIDX_I]], align 2, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP50:%.*]] = load i16, i16* [[ARRAYIDX_I]], align 2, !llvm.access.group !15 // CHECK1-NEXT: [[CONV5_I:%.*]] = sext i16 [[TMP50]] to i32 -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[CONV_I]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[CONV_I]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[ADD6_I:%.*]] = add nsw i32 [[TMP51]], [[CONV5_I]] -// CHECK1-NEXT: store i32 [[ADD6_I]], i32* [[CONV_I]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 +// CHECK1-NEXT: store i32 [[ADD6_I]], i32* [[CONV_I]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 // CHECK1-NEXT: [[ADD7_I:%.*]] = add nsw i32 [[TMP52]], 1 -// CHECK1-NEXT: store i32 [[ADD7_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD7_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: .omp_outlined..9.exit: // CHECK1-NEXT: ret i32 0 // @@ -1278,35 +1278,35 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK2-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 -// CHECK2-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// CHECK2-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 1 // CHECK2-NEXT: [[TMP24:%.*]] = load i64, i64* [[TMP23]], align 8 -// CHECK2-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 +// CHECK2-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK2-NEXT: [[TMP27:%.*]] = bitcast void (i8*, ...)* [[TMP25]] to void (i8*, i8***, i8***)* // CHECK2-NEXT: call void [[TMP27]](i8* [[TMP26]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR3]] -// CHECK2-NEXT: [[TMP28:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP29:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !13 +// CHECK2-NEXT: [[TMP28:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP29:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 // CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 0 // CHECK2-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP30]], align 8 // CHECK2-NEXT: [[TMP32:%.*]] = load i8*, i8** [[TMP28]], align 8 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 // CHECK2-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP31]] to i8* // CHECK2-NEXT: [[TMP35:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP33]], i8* [[TMP32]], i8* [[TMP34]]) #[[ATTR3]] // CHECK2-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP35]] to i32* @@ -1321,31 +1321,31 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP43:%.*]] = bitcast i16* [[TMP37]] to i8* // CHECK2-NEXT: [[TMP44:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP33]], i8* [[TMP42]], i8* [[TMP43]]) #[[ATTR3]] // CHECK2-NEXT: [[CONV2_I:%.*]] = bitcast i8* [[TMP44]] to i16* -// CHECK2-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 +// CHECK2-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK2-NEXT: [[CONV3_I:%.*]] = trunc i64 [[TMP45]] to i32 -// CHECK2-NEXT: store i32 [[CONV3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK2-NEXT: store i32 [[CONV3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK2: omp.inner.for.cond.i: -// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 // CHECK2-NEXT: [[CONV4_I:%.*]] = sext i32 [[TMP46]] to i64 -// CHECK2-NEXT: [[TMP47:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !13, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP47:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14, !llvm.access.group !15 // CHECK2-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV4_I]], [[TMP47]] // CHECK2-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__9_EXIT:%.*]] // CHECK2: omp.inner.for.body.i: -// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK2-NEXT: store i32 [[TMP48]], i32* [[I_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[CONV_I]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK2-NEXT: store i32 [[TMP48]], i32* [[I_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[CONV_I]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[TMP49]] to i64 // CHECK2-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i16, i16* [[CONV2_I]], i64 [[IDXPROM_I]] -// CHECK2-NEXT: [[TMP50:%.*]] = load i16, i16* [[ARRAYIDX_I]], align 2, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP50:%.*]] = load i16, i16* [[ARRAYIDX_I]], align 2, !llvm.access.group !15 // CHECK2-NEXT: [[CONV5_I:%.*]] = sext i16 [[TMP50]] to i32 -// CHECK2-NEXT: [[TMP51:%.*]] = load i32, i32* [[CONV_I]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, i32* [[CONV_I]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[ADD6_I:%.*]] = add nsw i32 [[TMP51]], [[CONV5_I]] -// CHECK2-NEXT: store i32 [[ADD6_I]], i32* [[CONV_I]], align 4, !llvm.access.group !14 -// CHECK2-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 +// CHECK2-NEXT: store i32 [[ADD6_I]], i32* [[CONV_I]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 // CHECK2-NEXT: [[ADD7_I:%.*]] = add nsw i32 [[TMP52]], 1 -// CHECK2-NEXT: store i32 [[ADD7_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD7_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK2: .omp_outlined..9.exit: // CHECK2-NEXT: ret i32 0 // diff --git a/clang/test/OpenMP/nvptx_data_sharing.cpp b/clang/test/OpenMP/nvptx_data_sharing.cpp index d63d7db7f5f24..83c22edee4644 100644 --- a/clang/test/OpenMP/nvptx_data_sharing.cpp +++ b/clang/test/OpenMP/nvptx_data_sharing.cpp @@ -3,8 +3,7 @@ ///==========================================================================/// // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK1 -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK2 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK // expected-no-diagnostics @@ -78,8 +77,6 @@ void test_ds(){ // CHECK1-NEXT: br label [[DOTAWAIT_WORK]] // CHECK1: .exit: // CHECK1-NEXT: ret void -// -// // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l15 // CHECK1-SAME: () #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: @@ -145,8 +142,6 @@ void test_ds(){ // CHECK1-NEXT: br label [[DOTEXIT]] // CHECK1: .exit: // CHECK1-NEXT: ret void -// -// // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: @@ -159,8 +154,6 @@ void test_ds(){ // CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK1-NEXT: store i32 1000, i32* [[TMP0]], align 4 // CHECK1-NEXT: ret void -// -// // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined___wrapper // CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: @@ -178,8 +171,6 @@ void test_ds(){ // CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 // CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] // CHECK1-NEXT: ret void -// -// // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 // CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[B:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: @@ -200,8 +191,6 @@ void test_ds(){ // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 10000 // CHECK1-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 // CHECK1-NEXT: ret void -// -// // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper // CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: @@ -222,8 +211,6 @@ void test_ds(){ // CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 // CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]]) #[[ATTR3]] // CHECK1-NEXT: ret void -// -// // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l15_worker // CHECK2-SAME: () #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: @@ -271,8 +258,6 @@ void test_ds(){ // CHECK2-NEXT: br label [[DOTAWAIT_WORK]] // CHECK2: .exit: // CHECK2-NEXT: ret void -// -// // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l15 // CHECK2-SAME: () #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: @@ -333,8 +318,6 @@ void test_ds(){ // CHECK2-NEXT: br label [[DOTEXIT]] // CHECK2: .exit: // CHECK2-NEXT: ret void -// -// // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: @@ -347,8 +330,6 @@ void test_ds(){ // CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 // CHECK2-NEXT: store i32 1000, i32* [[TMP0]], align 4 // CHECK2-NEXT: ret void -// -// // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined___wrapper // CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { // CHECK2-NEXT: entry: @@ -366,8 +347,6 @@ void test_ds(){ // CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 // CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] // CHECK2-NEXT: ret void -// -// // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 // CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[B:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: @@ -388,8 +367,6 @@ void test_ds(){ // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 10000 // CHECK2-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 // CHECK2-NEXT: ret void -// -// // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper // CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { // CHECK2-NEXT: entry: @@ -410,4 +387,190 @@ void test_ds(){ // CHECK2-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 // CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]]) #[[ATTR3]] // CHECK2-NEXT: ret void +// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_worker +// CHECK-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK-NEXT: store i8* null, i8** [[WORK_FN]], align 8 +// CHECK-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK: .await.work: +// CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 +// CHECK-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK: .select.workers: +// CHECK-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK: .execute.parallel: +// CHECK-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 8 +// CHECK-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*) +// CHECK-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] +// CHECK: .execute.fn: +// CHECK-NEXT: call void @__omp_outlined___wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3:[0-9]+]] +// CHECK-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK: .check.next: +// CHECK-NEXT: [[TMP6:%.*]] = load i8*, i8** [[WORK_FN]], align 8 +// CHECK-NEXT: [[WORK_MATCH1:%.*]] = icmp eq i8* [[TMP6]], bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*) +// CHECK-NEXT: br i1 [[WORK_MATCH1]], label [[DOTEXECUTE_FN2:%.*]], label [[DOTCHECK_NEXT3:%.*]] +// CHECK: .execute.fn2: +// CHECK-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3]] +// CHECK-NEXT: br label [[DOTTERMINATE_PARALLEL]] +// CHECK: .check.next3: +// CHECK-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK-NEXT: call void [[TMP7]](i16 0, i32 [[TMP4]]) +// CHECK-NEXT: br label [[DOTTERMINATE_PARALLEL]] +// CHECK: .terminate.parallel: +// CHECK-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK: .barrier.parallel: +// CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK: .exit: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14 +// CHECK-SAME: () #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 +// CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[CAPTURED_VARS_ADDRS7:%.*]] = alloca [2 x i8*], align 8 +// CHECK-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK: .worker: +// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z7test_dsv_l14_worker() #[[ATTR3]] +// CHECK-NEXT: br label [[DOTEXIT:%.*]] +// CHECK: .mastercheck: +// CHECK-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK: .master: +// CHECK-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK-NEXT: [[A:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK-NEXT: [[A_ON_STACK:%.*]] = bitcast i8* [[A]] to i32* +// CHECK-NEXT: [[B:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK-NEXT: [[B_ON_STACK:%.*]] = bitcast i8* [[B]] to i32* +// CHECK-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK-NEXT: store i32 10, i32* [[A_ON_STACK]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-NEXT: [[TMP7:%.*]] = bitcast i32* [[A_ON_STACK]] to i8* +// CHECK-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 8 +// CHECK-NEXT: [[TMP8:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP8]], i64 1) +// CHECK-NEXT: store i32 100, i32* [[B_ON_STACK]], align 4 +// CHECK-NEXT: store i32 1000, i32* [[C]], align 4 +// CHECK-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS7]], i64 0, i64 0 +// CHECK-NEXT: [[TMP10:%.*]] = bitcast i32* [[B_ON_STACK]] to i8* +// CHECK-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 8 +// CHECK-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS7]], i64 0, i64 1 +// CHECK-NEXT: [[TMP12:%.*]] = bitcast i32* [[A_ON_STACK]] to i8* +// CHECK-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8 +// CHECK-NEXT: [[TMP13:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS7]] to i8** +// CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP13]], i64 2) +// CHECK-NEXT: call void @__kmpc_free_shared(i8* [[B]]) +// CHECK-NEXT: call void @__kmpc_free_shared(i8* [[A]]) +// CHECK-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK: .termination.notifier: +// CHECK-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK-NEXT: br label [[DOTEXIT]] +// CHECK: .exit: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 +// CHECK-NEXT: store i32 1000, i32* [[TMP0]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__omp_outlined___wrapper +// CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +// CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +// CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +// CHECK-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 +// CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** +// CHECK-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 +// CHECK-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[B:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[C:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[C1:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: store i32* [[B]], i32** [[B_ADDR]], align 8 +// CHECK-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load i32*, i32** [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 +// CHECK-NEXT: store i32* [[C]], i32** [[C1]], align 8 +// CHECK-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP2]], 10000 +// CHECK-NEXT: store i32 [[ADD]], i32* [[TMP0]], align 4 +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper +// CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +// CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +// CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +// CHECK-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 +// CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** +// CHECK-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1 +// CHECK-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32** +// CHECK-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]]) #[[ATTR3]] +// CHECK-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp index 78d4f9d753f1e..0314d2e0524c5 100644 --- a/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp +++ b/clang/test/OpenMP/nvptx_distribute_parallel_generic_mode_codegen.cpp @@ -1,22 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ // Test target codegen - host bc file has to be created first. -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK1 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK2 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK3 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK4 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK5 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK6 - // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK7 -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK8 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK4 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK9 -// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK10 -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK11 -// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK12 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK5 +// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK6 // expected-no-diagnostics #ifndef HEADER @@ -75,8 +63,6 @@ int main(int argc, char **argv) { // CHECK1-NEXT: br label [[DOTEXIT:%.*]] // CHECK1: .exit: // CHECK1-NEXT: ret void -// -// // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i64 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: @@ -247,8 +233,6 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP64:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 // CHECK1-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP64]]) // CHECK1-NEXT: ret void -// -// // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 // CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: @@ -376,8 +360,6 @@ int main(int argc, char **argv) { // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: ret void -// -// // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l31 // CHECK2-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i64 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: @@ -417,8 +399,6 @@ int main(int argc, char **argv) { // CHECK2-NEXT: br label [[DOTEXIT:%.*]] // CHECK2: .exit: // CHECK2-NEXT: ret void -// -// // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i64 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { // CHECK2-NEXT: entry: @@ -584,8 +564,6 @@ int main(int argc, char **argv) { // CHECK2: omp.precond.end: // CHECK2-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP4]]) // CHECK2-NEXT: ret void -// -// // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 // CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { // CHECK2-NEXT: entry: @@ -713,8 +691,6 @@ int main(int argc, char **argv) { // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: ret void -// -// // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l31 // CHECK3-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i32 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK3-NEXT: entry: @@ -752,8 +728,6 @@ int main(int argc, char **argv) { // CHECK3-NEXT: br label [[DOTEXIT:%.*]] // CHECK3: .exit: // CHECK3-NEXT: ret void -// -// // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i32 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { // CHECK3-NEXT: entry: @@ -921,8 +895,6 @@ int main(int argc, char **argv) { // CHECK3-NEXT: [[TMP62:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 // CHECK3-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP62]]) // CHECK3-NEXT: ret void -// -// // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1 // CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { // CHECK3-NEXT: entry: @@ -1044,991 +1016,6 @@ int main(int argc, char **argv) { // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l31 -// CHECK4-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i32 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK4-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK4-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 -// CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK4-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK4-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK4: .execute: -// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], i32* [[ARGC_CASTED]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARGC_CASTED]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i32 [[TMP6]], [10 x i32]* [[TMP3]]) #[[ATTR4:[0-9]+]] -// CHECK4-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK4: .omp.deinit: -// CHECK4-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK4-NEXT: br label [[DOTEXIT:%.*]] -// CHECK4: .exit: -// CHECK4-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i32 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 -// CHECK4-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x i8*], align 4 -// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK4-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 -// CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* @"_openmp_static_kernel$size", align 4 -// CHECK4-NEXT: call void @__kmpc_get_team_static_memory(i16 1, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i32 [[TMP5]], i16 [[TMP4]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) -// CHECK4-NEXT: [[TMP6:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds i8, i8* [[TMP6]], i32 0 -// CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct._globalized_locals_ty* -// CHECK4-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP8]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP9]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP10]], 0 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK4-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK4-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP11]] -// CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK4: omp.precond.then: -// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = bitcast [10 x i32]* [[B4]] to i8* -// CHECK4-NEXT: [[TMP14:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP13]], i8* align 4 [[TMP14]], i32 40, i1 false) -// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK4-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[TMP15]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP16]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP17]], [[TMP18]] -// CHECK4-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK4: cond.true: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: br label [[COND_END:%.*]] -// CHECK4: cond.false: -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: br label [[COND_END]] -// CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP19]], [[COND_TRUE]] ], [ [[TMP20]], [[COND_FALSE]] ] -// CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP21]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK4-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP22]], [[ADD]] -// CHECK4-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP26:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP27:%.*]] = inttoptr i32 [[TMP24]] to i8* -// CHECK4-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4 -// CHECK4-NEXT: [[TMP28:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP29:%.*]] = inttoptr i32 [[TMP25]] to i8* -// CHECK4-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4 -// CHECK4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK4-NEXT: [[TMP31:%.*]] = bitcast i32* [[ARGC_ADDR]] to i8* -// CHECK4-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 4 -// CHECK4-NEXT: [[TMP32:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK4-NEXT: [[TMP33:%.*]] = bitcast i32* [[TMP2]] to i8* -// CHECK4-NEXT: store i8* [[TMP33]], i8** [[TMP32]], align 4 -// CHECK4-NEXT: [[TMP34:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 -// CHECK4-NEXT: [[TMP35:%.*]] = bitcast [10 x i32]* [[B4]] to i8* -// CHECK4-NEXT: store i8* [[TMP35]], i8** [[TMP34]], align 4 -// CHECK4-NEXT: [[TMP36:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP37:%.*]] = bitcast [10 x i32]* [[C1]] to i8* -// CHECK4-NEXT: store i8* [[TMP37]], i8** [[TMP36]], align 4 -// CHECK4-NEXT: [[TMP38:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP39:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* -// CHECK4-NEXT: store i8* [[TMP39]], i8** [[TMP38]], align 4 -// CHECK4-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP40]], 0 -// CHECK4-NEXT: [[TMP41:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK4-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 -// CHECK4-NEXT: [[TMP44:%.*]] = bitcast [7 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP43]], i32 [[TMP41]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP44]], i32 7) -// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP45]], [[TMP46]] -// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP47]], [[TMP48]] -// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP49]], [[TMP50]] -// CHECK4-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP51]], [[TMP52]] -// CHECK4-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] -// CHECK4: cond.true12: -// CHECK4-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: br label [[COND_END14:%.*]] -// CHECK4: cond.false13: -// CHECK4-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: br label [[COND_END14]] -// CHECK4: cond.end14: -// CHECK4-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP53]], [[COND_TRUE12]] ], [ [[TMP54]], [[COND_FALSE13]] ] -// CHECK4-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP55]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK4: omp.inner.for.end: -// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[TMP56:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP57:%.*]] = load i32, i32* [[TMP56]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP57]]) -// CHECK4-NEXT: [[TMP58:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP59:%.*]] = icmp ne i32 [[TMP58]], 0 -// CHECK4-NEXT: br i1 [[TMP59]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK4: .omp.lastprivate.then: -// CHECK4-NEXT: [[TMP60:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* -// CHECK4-NEXT: [[TMP61:%.*]] = bitcast [10 x i32]* [[C1]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP60]], i8* align 4 [[TMP61]], i32 40, i1 false) -// CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK4: .omp.lastprivate.done: -// CHECK4-NEXT: br label [[OMP_PRECOND_END]] -// CHECK4: omp.precond.end: -// CHECK4-NEXT: [[TMP62:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK4-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP62]]) -// CHECK4-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK4-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[B3:%.*]] = alloca [10 x i32], align 4 -// CHECK4-NEXT: [[C4:%.*]] = alloca [10 x i32], align 4 -// CHECK4-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK4-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 -// CHECK4-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK4-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] -// CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK4: omp.precond.then: -// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[B3]] to i8* -// CHECK4-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 40, i1 false) -// CHECK4-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK4-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] -// CHECK4-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK4-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I5]]) #[[ATTR5:[0-9]+]] -// CHECK4-NEXT: [[CALL7:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR5]] -// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP19]] -// CHECK4-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR5]] -// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK4-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP20]] -// CHECK4-NEXT: [[CALL12:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX11]]) #[[ATTR5]] -// CHECK4-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD10]], [[CALL12]] -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK4-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP21]] -// CHECK4-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR5]] -// CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD13]], [[CALL15]] -// CHECK4-NEXT: store i32 [[ADD16]], i32* [[TMP1]], align 4 -// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK4: omp.body.continue: -// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK4-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK4: omp.inner.for.end: -// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK4-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK4: .omp.lastprivate.then: -// CHECK4-NEXT: [[TMP28:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* -// CHECK4-NEXT: [[TMP29:%.*]] = bitcast [10 x i32]* [[C4]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 40, i1 false) -// CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK4: .omp.lastprivate.done: -// CHECK4-NEXT: br label [[OMP_PRECOND_END]] -// CHECK4: omp.precond.end: -// CHECK4-NEXT: ret void -// -// -// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l31 -// CHECK5-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i32 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK5-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK5-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 -// CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 -// CHECK5-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK5-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK5-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK5: .execute: -// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[ARGC_CASTED]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARGC_CASTED]], align 4 -// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK5-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i32 [[TMP6]], [10 x i32]* [[TMP3]]) #[[ATTR4:[0-9]+]] -// CHECK5-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK5: .omp.deinit: -// CHECK5-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK5-NEXT: br label [[DOTEXIT:%.*]] -// CHECK5: .exit: -// CHECK5-NEXT: ret void -// -// -// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i32 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 -// CHECK5-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x i8*], align 4 -// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK5-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 -// CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 -// CHECK5-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 40, i16 1) -// CHECK5-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct._globalized_locals_ty* -// CHECK5-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 -// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK5-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK5-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK5: omp.precond.then: -// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = bitcast [10 x i32]* [[B4]] to i8* -// CHECK5-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP10]], i8* align 4 [[TMP11]], i32 40, i1 false) -// CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK5-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] -// CHECK5-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK5: cond.true: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: br label [[COND_END:%.*]] -// CHECK5: cond.false: -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: br label [[COND_END]] -// CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] -// CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK5-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP19]], [[ADD]] -// CHECK5-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP21]] to i8* -// CHECK5-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 -// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP22]] to i8* -// CHECK5-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 -// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK5-NEXT: [[TMP28:%.*]] = bitcast i32* [[ARGC_ADDR]] to i8* -// CHECK5-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 -// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK5-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP2]] to i8* -// CHECK5-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 4 -// CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 -// CHECK5-NEXT: [[TMP32:%.*]] = bitcast [10 x i32]* [[B4]] to i8* -// CHECK5-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 4 -// CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 -// CHECK5-NEXT: [[TMP34:%.*]] = bitcast [10 x i32]* [[C1]] to i8* -// CHECK5-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 4 -// CHECK5-NEXT: [[TMP35:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 -// CHECK5-NEXT: [[TMP36:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* -// CHECK5-NEXT: store i8* [[TMP36]], i8** [[TMP35]], align 4 -// CHECK5-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP37]], 0 -// CHECK5-NEXT: [[TMP38:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK5-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 -// CHECK5-NEXT: [[TMP41:%.*]] = bitcast [7 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK5-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP40]], i32 [[TMP38]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP41]], i32 7) -// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] -// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] -// CHECK5-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP46]], [[TMP47]] -// CHECK5-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]] -// CHECK5-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] -// CHECK5: cond.true12: -// CHECK5-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK5-NEXT: br label [[COND_END14:%.*]] -// CHECK5: cond.false13: -// CHECK5-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: br label [[COND_END14]] -// CHECK5: cond.end14: -// CHECK5-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP50]], [[COND_TRUE12]] ], [ [[TMP51]], [[COND_FALSE13]] ] -// CHECK5-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP52]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK5: omp.inner.for.end: -// CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP54]]) -// CHECK5-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0 -// CHECK5-NEXT: br i1 [[TMP56]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP57:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* -// CHECK5-NEXT: [[TMP58:%.*]] = bitcast [10 x i32]* [[C1]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP57]], i8* align 4 [[TMP58]], i32 40, i1 false) -// CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: br label [[OMP_PRECOND_END]] -// CHECK5: omp.precond.end: -// CHECK5-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP4]]) -// CHECK5-NEXT: ret void -// -// -// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[B3:%.*]] = alloca [10 x i32], align 4 -// CHECK5-NEXT: [[C4:%.*]] = alloca [10 x i32], align 4 -// CHECK5-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4 -// CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK5-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 -// CHECK5-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK5-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK5-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK5-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] -// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK5: omp.precond.then: -// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[B3]] to i8* -// CHECK5-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 40, i1 false) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK5-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] -// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK5-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I5]]) #[[ATTR5:[0-9]+]] -// CHECK5-NEXT: [[CALL7:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR5]] -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP19]] -// CHECK5-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR5]] -// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK5-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP20]] -// CHECK5-NEXT: [[CALL12:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX11]]) #[[ATTR5]] -// CHECK5-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD10]], [[CALL12]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK5-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP21]] -// CHECK5-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR5]] -// CHECK5-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD13]], [[CALL15]] -// CHECK5-NEXT: store i32 [[ADD16]], i32* [[TMP1]], align 4 -// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK5: omp.body.continue: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK5-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK5: omp.inner.for.end: -// CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK5: omp.loop.exit: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK5-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK5: .omp.lastprivate.then: -// CHECK5-NEXT: [[TMP28:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* -// CHECK5-NEXT: [[TMP29:%.*]] = bitcast [10 x i32]* [[C4]] to i8* -// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 40, i1 false) -// CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK5: .omp.lastprivate.done: -// CHECK5-NEXT: br label [[OMP_PRECOND_END]] -// CHECK5: omp.precond.end: -// CHECK5-NEXT: ret void -// -// -// CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l31 -// CHECK6-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i32 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK6-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK6-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK6-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 -// CHECK6-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK6-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 -// CHECK6-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK6-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK6-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK6-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK6: .execute: -// CHECK6-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP5]], i32* [[ARGC_CASTED]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARGC_CASTED]], align 4 -// CHECK6-NEXT: store i32 [[TMP4]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK6-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i32 [[TMP6]], [10 x i32]* [[TMP3]]) #[[ATTR4:[0-9]+]] -// CHECK6-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK6: .omp.deinit: -// CHECK6-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK6-NEXT: br label [[DOTEXIT:%.*]] -// CHECK6: .exit: -// CHECK6-NEXT: ret void -// -// -// CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i32 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 -// CHECK6-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x i8*], align 4 -// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK6-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 -// CHECK6-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK6-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 -// CHECK6-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 40, i16 1) -// CHECK6-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct._globalized_locals_ty* -// CHECK6-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK6-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 -// CHECK6-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK6-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK6-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK6-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK6-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK6: omp.precond.then: -// CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK6-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP10:%.*]] = bitcast [10 x i32]* [[B4]] to i8* -// CHECK6-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP10]], i8* align 4 [[TMP11]], i32 40, i1 false) -// CHECK6-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK6-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK6-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] -// CHECK6-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK6: cond.true: -// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK6-NEXT: br label [[COND_END:%.*]] -// CHECK6: cond.false: -// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: br label [[COND_END]] -// CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] -// CHECK6-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK6-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP19]], [[ADD]] -// CHECK6-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK6-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP21]] to i8* -// CHECK6-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 -// CHECK6-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP22]] to i8* -// CHECK6-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 -// CHECK6-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK6-NEXT: [[TMP28:%.*]] = bitcast i32* [[ARGC_ADDR]] to i8* -// CHECK6-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 -// CHECK6-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK6-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP2]] to i8* -// CHECK6-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 4 -// CHECK6-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 -// CHECK6-NEXT: [[TMP32:%.*]] = bitcast [10 x i32]* [[B4]] to i8* -// CHECK6-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 4 -// CHECK6-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 -// CHECK6-NEXT: [[TMP34:%.*]] = bitcast [10 x i32]* [[C1]] to i8* -// CHECK6-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 4 -// CHECK6-NEXT: [[TMP35:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 -// CHECK6-NEXT: [[TMP36:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* -// CHECK6-NEXT: store i8* [[TMP36]], i8** [[TMP35]], align 4 -// CHECK6-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP37]], 0 -// CHECK6-NEXT: [[TMP38:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK6-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 -// CHECK6-NEXT: [[TMP41:%.*]] = bitcast [7 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK6-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP40]], i32 [[TMP38]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP41]], i32 7) -// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] -// CHECK6-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK6-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] -// CHECK6-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK6-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP46]], [[TMP47]] -// CHECK6-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK6-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]] -// CHECK6-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] -// CHECK6: cond.true12: -// CHECK6-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK6-NEXT: br label [[COND_END14:%.*]] -// CHECK6: cond.false13: -// CHECK6-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: br label [[COND_END14]] -// CHECK6: cond.end14: -// CHECK6-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP50]], [[COND_TRUE12]] ], [ [[TMP51]], [[COND_FALSE13]] ] -// CHECK6-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP52]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK6: omp.inner.for.end: -// CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK6: omp.loop.exit: -// CHECK6-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP54]]) -// CHECK6-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0 -// CHECK6-NEXT: br i1 [[TMP56]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK6: .omp.lastprivate.then: -// CHECK6-NEXT: [[TMP57:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* -// CHECK6-NEXT: [[TMP58:%.*]] = bitcast [10 x i32]* [[C1]] to i8* -// CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP57]], i8* align 4 [[TMP58]], i32 40, i1 false) -// CHECK6-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK6: .omp.lastprivate.done: -// CHECK6-NEXT: br label [[OMP_PRECOND_END]] -// CHECK6: omp.precond.end: -// CHECK6-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP4]]) -// CHECK6-NEXT: ret void -// -// -// CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[B3:%.*]] = alloca [10 x i32], align 4 -// CHECK6-NEXT: [[C4:%.*]] = alloca [10 x i32], align 4 -// CHECK6-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4 -// CHECK6-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK6-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK6-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 -// CHECK6-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK6-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK6-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK6-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK6-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK6-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] -// CHECK6-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK6: omp.precond.then: -// CHECK6-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[B3]] to i8* -// CHECK6-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* -// CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 40, i1 false) -// CHECK6-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK6-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] -// CHECK6-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK6-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I5]]) #[[ATTR5:[0-9]+]] -// CHECK6-NEXT: [[CALL7:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR5]] -// CHECK6-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] -// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP19]] -// CHECK6-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR5]] -// CHECK6-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] -// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK6-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP20]] -// CHECK6-NEXT: [[CALL12:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX11]]) #[[ATTR5]] -// CHECK6-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD10]], [[CALL12]] -// CHECK6-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK6-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP21]] -// CHECK6-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR5]] -// CHECK6-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD13]], [[CALL15]] -// CHECK6-NEXT: store i32 [[ADD16]], i32* [[TMP1]], align 4 -// CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK6: omp.body.continue: -// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK6-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK6: omp.inner.for.end: -// CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK6: omp.loop.exit: -// CHECK6-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK6-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK6-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK6: .omp.lastprivate.then: -// CHECK6-NEXT: [[TMP28:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* -// CHECK6-NEXT: [[TMP29:%.*]] = bitcast [10 x i32]* [[C4]] to i8* -// CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 40, i1 false) -// CHECK6-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK6: .omp.lastprivate.done: -// CHECK6-NEXT: br label [[OMP_PRECOND_END]] -// CHECK6: omp.precond.end: -// CHECK6-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l31 // CHECK7-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i64 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK7-NEXT: entry: @@ -2068,8 +1055,6 @@ int main(int argc, char **argv) { // CHECK7-NEXT: br label [[DOTEXIT:%.*]] // CHECK7: .exit: // CHECK7-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i64 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: @@ -2240,8 +1225,6 @@ int main(int argc, char **argv) { // CHECK7-NEXT: [[TMP64:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 // CHECK7-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP64]]) // CHECK7-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@__omp_outlined__1 // CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: @@ -2369,8 +1352,6 @@ int main(int argc, char **argv) { // CHECK7-NEXT: br label [[OMP_PRECOND_END]] // CHECK7: omp.precond.end: // CHECK7-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l31 // CHECK8-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i64 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK8-NEXT: entry: @@ -2410,8 +1391,6 @@ int main(int argc, char **argv) { // CHECK8-NEXT: br label [[DOTEXIT:%.*]] // CHECK8: .exit: // CHECK8-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i64 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: @@ -2577,8 +1556,6 @@ int main(int argc, char **argv) { // CHECK8: omp.precond.end: // CHECK8-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP4]]) // CHECK8-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@__omp_outlined__1 // CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: @@ -2706,8 +1683,6 @@ int main(int argc, char **argv) { // CHECK8-NEXT: br label [[OMP_PRECOND_END]] // CHECK8: omp.precond.end: // CHECK8-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l31 // CHECK9-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i32 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK9-NEXT: entry: @@ -2745,8 +1720,6 @@ int main(int argc, char **argv) { // CHECK9-NEXT: br label [[DOTEXIT:%.*]] // CHECK9: .exit: // CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i32 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: @@ -2914,8 +1887,6 @@ int main(int argc, char **argv) { // CHECK9-NEXT: [[TMP62:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 // CHECK9-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP62]]) // CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@__omp_outlined__1 // CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: @@ -3037,8 +2008,6 @@ int main(int argc, char **argv) { // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l31 // CHECK10-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i32 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK10-NEXT: entry: @@ -3076,8 +2045,6 @@ int main(int argc, char **argv) { // CHECK10-NEXT: br label [[DOTEXIT:%.*]] // CHECK10: .exit: // CHECK10-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK10-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i32 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: @@ -3245,8 +2212,6 @@ int main(int argc, char **argv) { // CHECK10-NEXT: [[TMP62:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 // CHECK10-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP62]]) // CHECK10-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@__omp_outlined__1 // CHECK10-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: @@ -3368,656 +2333,1628 @@ int main(int argc, char **argv) { // CHECK10-NEXT: br label [[OMP_PRECOND_END]] // CHECK10: omp.precond.end: // CHECK10-NEXT: ret void -// -// // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l31 // CHECK11-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i32 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK11-NEXT: entry: // CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK11-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK11-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK11-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 +// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 +// CHECK11-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 +// CHECK11-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK11-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK11-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK11: .execute: +// CHECK11-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[ARGC_CASTED]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARGC_CASTED]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK11-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i32 [[TMP6]], [10 x i32]* [[TMP3]]) #[[ATTR4:[0-9]+]] +// CHECK11-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK11: .omp.deinit: +// CHECK11-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK11-NEXT: br label [[DOTEXIT:%.*]] +// CHECK11: .exit: +// CHECK11-NEXT: ret void +// CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i32 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 +// CHECK11-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x i8*], align 4 +// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK11-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 +// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 +// CHECK11-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 40, i16 1) +// CHECK11-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct._globalized_locals_ty* +// CHECK11-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK11: omp.precond.then: +// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = bitcast [10 x i32]* [[B4]] to i8* +// CHECK11-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP10]], i8* align 4 [[TMP11]], i32 40, i1 false) +// CHECK11-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11: cond.true: +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: br label [[COND_END:%.*]] +// CHECK11: cond.false: +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: br label [[COND_END]] +// CHECK11: cond.end: +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK11: omp.inner.for.cond: +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK11-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP19]], [[ADD]] +// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11: omp.inner.for.body: +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP21]] to i8* +// CHECK11-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP22]] to i8* +// CHECK11-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP28:%.*]] = bitcast i32* [[ARGC_ADDR]] to i8* +// CHECK11-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP2]] to i8* +// CHECK11-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP32:%.*]] = bitcast [10 x i32]* [[B4]] to i8* +// CHECK11-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 +// CHECK11-NEXT: [[TMP34:%.*]] = bitcast [10 x i32]* [[C1]] to i8* +// CHECK11-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 +// CHECK11-NEXT: [[TMP36:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* +// CHECK11-NEXT: store i8* [[TMP36]], i8** [[TMP35]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK11-NEXT: [[TMP38:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = bitcast [7 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK11-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP40]], i32 [[TMP38]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP41]], i32 7) +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK11: omp.inner.for.inc: +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] +// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] +// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP46]], [[TMP47]] +// CHECK11-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]] +// CHECK11-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] +// CHECK11: cond.true12: +// CHECK11-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: br label [[COND_END14:%.*]] +// CHECK11: cond.false13: +// CHECK11-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: br label [[COND_END14]] +// CHECK11: cond.end14: +// CHECK11-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP50]], [[COND_TRUE12]] ], [ [[TMP51]], [[COND_FALSE13]] ] +// CHECK11-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP52]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK11: omp.inner.for.end: +// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK11: omp.loop.exit: +// CHECK11-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP54]]) +// CHECK11-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0 +// CHECK11-NEXT: br i1 [[TMP56]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11: .omp.lastprivate.then: +// CHECK11-NEXT: [[TMP57:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* +// CHECK11-NEXT: [[TMP58:%.*]] = bitcast [10 x i32]* [[C1]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP57]], i8* align 4 [[TMP58]], i32 40, i1 false) +// CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK11: .omp.lastprivate.done: +// CHECK11-NEXT: br label [[OMP_PRECOND_END]] +// CHECK11: omp.precond.end: +// CHECK11-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP4]]) +// CHECK11-NEXT: ret void +// CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 // CHECK11-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK11-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[B3:%.*]] = alloca [10 x i32], align 4 +// CHECK11-NEXT: [[C4:%.*]] = alloca [10 x i32], align 4 +// CHECK11-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4 +// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 // CHECK11-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 // CHECK11-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 // CHECK11-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK11-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK11-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK11-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK11: .execute: -// CHECK11-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[ARGC_CASTED]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARGC_CASTED]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK11-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i32 [[TMP6]], [10 x i32]* [[TMP3]]) #[[ATTR4:[0-9]+]] -// CHECK11-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK11: .omp.deinit: -// CHECK11-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK11-NEXT: br label [[DOTEXIT:%.*]] -// CHECK11: .exit: +// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK11: omp.precond.then: +// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[B3]] to i8* +// CHECK11-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 40, i1 false) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK11: omp.inner.for.cond: +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11: omp.inner.for.body: +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 +// CHECK11-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I5]]) #[[ATTR5:[0-9]+]] +// CHECK11-NEXT: [[CALL7:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR5]] +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP19]] +// CHECK11-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR5]] +// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK11-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP20]] +// CHECK11-NEXT: [[CALL12:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX11]]) #[[ATTR5]] +// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD10]], [[CALL12]] +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK11-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP21]] +// CHECK11-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR5]] +// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD13]], [[CALL15]] +// CHECK11-NEXT: store i32 [[ADD16]], i32* [[TMP1]], align 4 +// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK11: omp.body.continue: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK11: omp.inner.for.inc: +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK11-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK11: omp.inner.for.end: +// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK11: omp.loop.exit: +// CHECK11-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK11-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11: .omp.lastprivate.then: +// CHECK11-NEXT: [[TMP28:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* +// CHECK11-NEXT: [[TMP29:%.*]] = bitcast [10 x i32]* [[C4]] to i8* +// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 40, i1 false) +// CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK11: .omp.lastprivate.done: +// CHECK11-NEXT: br label [[OMP_PRECOND_END]] +// CHECK11: omp.precond.end: // CHECK11-NEXT: ret void +// CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l31 +// CHECK12-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i32 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK12-NEXT: entry: +// CHECK12-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK12-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK12-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK12-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK12-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK12-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 +// CHECK12-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 +// CHECK12-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 +// CHECK12-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK12-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK12-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK12: .execute: +// CHECK12-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP5]], i32* [[ARGC_CASTED]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARGC_CASTED]], align 4 +// CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK12-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i32 [[TMP6]], [10 x i32]* [[TMP3]]) #[[ATTR4:[0-9]+]] +// CHECK12-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK12: .omp.deinit: +// CHECK12-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK12-NEXT: br label [[DOTEXIT:%.*]] +// CHECK12: .exit: +// CHECK12-NEXT: ret void +// CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i32 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { +// CHECK12-NEXT: entry: +// CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK12-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK12-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK12-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 +// CHECK12-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x i8*], align 4 +// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK12-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK12-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 +// CHECK12-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 +// CHECK12-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 +// CHECK12-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 40, i16 1) +// CHECK12-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct._globalized_locals_ty* +// CHECK12-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK12-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK12-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK12: omp.precond.then: +// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK12-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = bitcast [10 x i32]* [[B4]] to i8* +// CHECK12-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK12-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP10]], i8* align 4 [[TMP11]], i32 40, i1 false) +// CHECK12-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK12-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK12-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] +// CHECK12-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK12: cond.true: +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK12-NEXT: br label [[COND_END:%.*]] +// CHECK12: cond.false: +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: br label [[COND_END]] +// CHECK12: cond.end: +// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] +// CHECK12-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK12: omp.inner.for.cond: +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], 1 +// CHECK12-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP19]], [[ADD]] +// CHECK12-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK12: omp.inner.for.body: +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP21]] to i8* +// CHECK12-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 +// CHECK12-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP22]] to i8* +// CHECK12-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 +// CHECK12-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP28:%.*]] = bitcast i32* [[ARGC_ADDR]] to i8* +// CHECK12-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 +// CHECK12-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK12-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP2]] to i8* +// CHECK12-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 4 +// CHECK12-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 +// CHECK12-NEXT: [[TMP32:%.*]] = bitcast [10 x i32]* [[B4]] to i8* +// CHECK12-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 4 +// CHECK12-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 +// CHECK12-NEXT: [[TMP34:%.*]] = bitcast [10 x i32]* [[C1]] to i8* +// CHECK12-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 4 +// CHECK12-NEXT: [[TMP35:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 +// CHECK12-NEXT: [[TMP36:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* +// CHECK12-NEXT: store i8* [[TMP36]], i8** [[TMP35]], align 4 +// CHECK12-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK12-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK12-NEXT: [[TMP38:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK12-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 +// CHECK12-NEXT: [[TMP41:%.*]] = bitcast [7 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK12-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP40]], i32 [[TMP38]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP41]], i32 7) +// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK12: omp.inner.for.inc: +// CHECK12-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] +// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] +// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP46]], [[TMP47]] +// CHECK12-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK12-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]] +// CHECK12-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] +// CHECK12: cond.true12: +// CHECK12-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK12-NEXT: br label [[COND_END14:%.*]] +// CHECK12: cond.false13: +// CHECK12-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: br label [[COND_END14]] +// CHECK12: cond.end14: +// CHECK12-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP50]], [[COND_TRUE12]] ], [ [[TMP51]], [[COND_FALSE13]] ] +// CHECK12-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP52]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK12: omp.inner.for.end: +// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK12: omp.loop.exit: +// CHECK12-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP54]]) +// CHECK12-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK12-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0 +// CHECK12-NEXT: br i1 [[TMP56]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK12: .omp.lastprivate.then: +// CHECK12-NEXT: [[TMP57:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* +// CHECK12-NEXT: [[TMP58:%.*]] = bitcast [10 x i32]* [[C1]] to i8* +// CHECK12-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP57]], i8* align 4 [[TMP58]], i32 40, i1 false) +// CHECK12-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK12: .omp.lastprivate.done: +// CHECK12-NEXT: br label [[OMP_PRECOND_END]] +// CHECK12: omp.precond.end: +// CHECK12-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP4]]) +// CHECK12-NEXT: ret void +// CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { +// CHECK12-NEXT: entry: +// CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK12-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 4 +// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK12-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK12-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK12-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[B3:%.*]] = alloca [10 x i32], align 4 +// CHECK12-NEXT: [[C4:%.*]] = alloca [10 x i32], align 4 +// CHECK12-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK12-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4 +// CHECK12-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 +// CHECK12-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK12-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 +// CHECK12-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 +// CHECK12-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK12-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK12-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK12: omp.precond.then: +// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[B3]] to i8* +// CHECK12-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* +// CHECK12-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 40, i1 false) +// CHECK12-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK12: omp.inner.for.cond: +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK12-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] +// CHECK12-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK12: omp.inner.for.body: +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 +// CHECK12-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I5]]) #[[ATTR5:[0-9]+]] +// CHECK12-NEXT: [[CALL7:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR5]] +// CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP19]] +// CHECK12-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR5]] +// CHECK12-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK12-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP20]] +// CHECK12-NEXT: [[CALL12:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX11]]) #[[ATTR5]] +// CHECK12-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD10]], [[CALL12]] +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK12-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP21]] +// CHECK12-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR5]] +// CHECK12-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD13]], [[CALL15]] +// CHECK12-NEXT: store i32 [[ADD16]], i32* [[TMP1]], align 4 +// CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK12: omp.body.continue: +// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK12: omp.inner.for.inc: +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK12-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK12: omp.inner.for.end: +// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK12: omp.loop.exit: +// CHECK12-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) +// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK12-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK12-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK12: .omp.lastprivate.then: +// CHECK12-NEXT: [[TMP28:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* +// CHECK12-NEXT: [[TMP29:%.*]] = bitcast [10 x i32]* [[C4]] to i8* +// CHECK12-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 40, i1 false) +// CHECK12-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK12: .omp.lastprivate.done: +// CHECK12-NEXT: br label [[OMP_PRECOND_END]] +// CHECK12: omp.precond.end: +// CHECK12-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19 +// CHECK4-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i64 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK4-NEXT: [[ARGC_CASTED:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK4-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 8 +// CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 +// CHECK4-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 +// CHECK4-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 +// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* +// CHECK4-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 8 +// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK4-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK4-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK4: .execute: +// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[ARGC_CASTED]] to i32* +// CHECK4-NEXT: store i32 [[TMP5]], i32* [[CONV1]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i64, i64* [[ARGC_CASTED]], align 8 +// CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i64 [[TMP6]], [10 x i32]* [[TMP3]]) #[[ATTR4:[0-9]+]] +// CHECK4-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK4: .omp.deinit: +// CHECK4-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK4-NEXT: br label [[DOTEXIT:%.*]] +// CHECK4: .exit: +// CHECK4-NEXT: ret void // // -// CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i32 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 -// CHECK11-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x i8*], align 4 -// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 40, i16 1) -// CHECK11-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct._globalized_locals_ty* -// CHECK11-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK11: omp.precond.then: -// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = bitcast [10 x i32]* [[B4]] to i8* -// CHECK11-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP10]], i8* align 4 [[TMP11]], i32 40, i1 false) -// CHECK11-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK11: cond.true: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: br label [[COND_END:%.*]] -// CHECK11: cond.false: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: br label [[COND_END]] -// CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] -// CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP19]], [[ADD]] -// CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP21]] to i8* -// CHECK11-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK11-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP22]] to i8* -// CHECK11-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP28:%.*]] = bitcast i32* [[ARGC_ADDR]] to i8* -// CHECK11-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK11-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP2]] to i8* -// CHECK11-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 -// CHECK11-NEXT: [[TMP32:%.*]] = bitcast [10 x i32]* [[B4]] to i8* -// CHECK11-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 -// CHECK11-NEXT: [[TMP34:%.*]] = bitcast [10 x i32]* [[C1]] to i8* -// CHECK11-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 -// CHECK11-NEXT: [[TMP36:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* -// CHECK11-NEXT: store i8* [[TMP36]], i8** [[TMP35]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK11-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP37]], 0 -// CHECK11-NEXT: [[TMP38:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK11-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 -// CHECK11-NEXT: [[TMP41:%.*]] = bitcast [7 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK11-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP40]], i32 [[TMP38]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP41]], i32 7) -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] -// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP46]], [[TMP47]] -// CHECK11-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]] -// CHECK11-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] -// CHECK11: cond.true12: -// CHECK11-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: br label [[COND_END14:%.*]] -// CHECK11: cond.false13: -// CHECK11-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: br label [[COND_END14]] -// CHECK11: cond.end14: -// CHECK11-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP50]], [[COND_TRUE12]] ], [ [[TMP51]], [[COND_FALSE13]] ] -// CHECK11-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP52]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK11: omp.inner.for.end: -// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP54]]) -// CHECK11-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0 -// CHECK11-NEXT: br i1 [[TMP56]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP57:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* -// CHECK11-NEXT: [[TMP58:%.*]] = bitcast [10 x i32]* [[C1]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP57]], i8* align 4 [[TMP58]], i32 40, i1 false) -// CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: br label [[OMP_PRECOND_END]] -// CHECK11: omp.precond.end: -// CHECK11-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP4]]) -// CHECK11-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i64 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 +// CHECK4-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x i8*], align 8 +// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK4-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 8 +// CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 +// CHECK4-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 +// CHECK4-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 8 +// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* +// CHECK4-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 8 +// CHECK4-NEXT: [[C1:%.*]] = call i8* @__kmpc_alloc_shared(i64 40) +// CHECK4-NEXT: [[C_ON_STACK:%.*]] = bitcast i8* [[C1]] to [10 x i32]* +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK4-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK4-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK4: omp.precond.then: +// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = bitcast [10 x i32]* [[B4]] to i8* +// CHECK4-NEXT: [[TMP9:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i64 40, i1 false) +// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK4-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK4-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK4: cond.true: +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: br label [[COND_END:%.*]] +// CHECK4: cond.false: +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: br label [[COND_END]] +// CHECK4: cond.end: +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK4: omp.inner.for.cond: +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK4-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK4-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4: omp.inner.for.body: +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 +// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK4-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP20]] to i8* +// CHECK4-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 +// CHECK4-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK4-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP22]] to i8* +// CHECK4-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 +// CHECK4-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK4-NEXT: [[TMP28:%.*]] = bitcast i32* [[CONV]] to i8* +// CHECK4-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 +// CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 +// CHECK4-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP2]] to i8* +// CHECK4-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8 +// CHECK4-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 +// CHECK4-NEXT: [[TMP32:%.*]] = bitcast [10 x i32]* [[B4]] to i8* +// CHECK4-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 8 +// CHECK4-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 5 +// CHECK4-NEXT: [[TMP34:%.*]] = bitcast [10 x i32]* [[C_ON_STACK]] to i8* +// CHECK4-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 8 +// CHECK4-NEXT: [[TMP35:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 6 +// CHECK4-NEXT: [[TMP36:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* +// CHECK4-NEXT: store i8* [[TMP36]], i8** [[TMP35]], align 8 +// CHECK4-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK4-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP37]], 0 +// CHECK4-NEXT: [[TMP38:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK4-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 +// CHECK4-NEXT: [[TMP41:%.*]] = bitcast [7 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP40]], i32 [[TMP38]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP41]], i64 7) +// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK4: omp.inner.for.inc: +// CHECK4-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] +// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] +// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP46]], [[TMP47]] +// CHECK4-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]] +// CHECK4-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] +// CHECK4: cond.true12: +// CHECK4-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: br label [[COND_END14:%.*]] +// CHECK4: cond.false13: +// CHECK4-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: br label [[COND_END14]] +// CHECK4: cond.end14: +// CHECK4-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP50]], [[COND_TRUE12]] ], [ [[TMP51]], [[COND_FALSE13]] ] +// CHECK4-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP52]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK4: omp.inner.for.end: +// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK4: omp.loop.exit: +// CHECK4-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP54]]) +// CHECK4-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0 +// CHECK4-NEXT: br i1 [[TMP56]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK4: .omp.lastprivate.then: +// CHECK4-NEXT: [[TMP57:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* +// CHECK4-NEXT: [[TMP58:%.*]] = bitcast [10 x i32]* [[C_ON_STACK]] to i8* +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP57]], i8* align 4 [[TMP58]], i64 40, i1 false) +// CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK4: .omp.lastprivate.done: +// CHECK4-NEXT: br label [[OMP_PRECOND_END]] +// CHECK4: omp.precond.end: +// CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[C1]]) +// CHECK4-NEXT: ret void // // -// CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK11-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B3:%.*]] = alloca [10 x i32], align 4 -// CHECK11-NEXT: [[C4:%.*]] = alloca [10 x i32], align 4 -// CHECK11-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4 -// CHECK11-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] -// CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK11: omp.precond.then: -// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[B3]] to i8* -// CHECK11-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 40, i1 false) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] -// CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK11-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I5]]) #[[ATTR5:[0-9]+]] -// CHECK11-NEXT: [[CALL7:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR5]] -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP19]] -// CHECK11-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR5]] -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK11-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP20]] -// CHECK11-NEXT: [[CALL12:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX11]]) #[[ATTR5]] -// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD10]], [[CALL12]] -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK11-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP21]] -// CHECK11-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR5]] -// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD13]], [[CALL15]] -// CHECK11-NEXT: store i32 [[ADD16]], i32* [[TMP1]], align 4 -// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK11: omp.body.continue: -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK11-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK11: omp.inner.for.end: -// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK11-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP28:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* -// CHECK11-NEXT: [[TMP29:%.*]] = bitcast [10 x i32]* [[C4]] to i8* -// CHECK11-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 40, i1 false) -// CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: br label [[OMP_PRECOND_END]] -// CHECK11: omp.precond.end: -// CHECK11-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK4-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 +// CHECK4-NEXT: [[C5:%.*]] = alloca [10 x i32], align 4 +// CHECK4-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK4-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK4-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK4-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 +// CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 +// CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK4-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 8 +// CHECK4-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 8 +// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 8 +// CHECK4-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK4-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 8 +// CHECK4-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 8 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK4-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK4: omp.precond.then: +// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK4-NEXT: [[CONV:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK4-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK4-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP10]] to i32 +// CHECK4-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[B4]] to i8* +// CHECK4-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i64 40, i1 false) +// CHECK4-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK4: omp.inner.for.cond: +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[CONV7:%.*]] = sext i32 [[TMP16]] to i64 +// CHECK4-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK4-NEXT: [[CMP8:%.*]] = icmp ule i64 [[CONV7]], [[TMP17]] +// CHECK4-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4: omp.inner.for.body: +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 +// CHECK4-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I6]]) #[[ATTR5:[0-9]+]] +// CHECK4-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR5]] +// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[CALL]], [[CALL9]] +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B4]], i64 0, i64 [[IDXPROM]] +// CHECK4-NEXT: [[CALL11:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR5]] +// CHECK4-NEXT: [[ADD12:%.*]] = add nsw i32 [[ADD10]], [[CALL11]] +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK4-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP20]] to i64 +// CHECK4-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C5]], i64 0, i64 [[IDXPROM13]] +// CHECK4-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR5]] +// CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD12]], [[CALL15]] +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK4-NEXT: [[IDXPROM17:%.*]] = sext i32 [[TMP21]] to i64 +// CHECK4-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i64 0, i64 [[IDXPROM17]] +// CHECK4-NEXT: [[CALL19:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX18]]) #[[ATTR5]] +// CHECK4-NEXT: [[ADD20:%.*]] = add nsw i32 [[ADD16]], [[CALL19]] +// CHECK4-NEXT: store i32 [[ADD20]], i32* [[TMP1]], align 4 +// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK4: omp.body.continue: +// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK4: omp.inner.for.inc: +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD21:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK4-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK4: omp.inner.for.end: +// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK4: omp.loop.exit: +// CHECK4-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK4-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK4: .omp.lastprivate.then: +// CHECK4-NEXT: [[TMP28:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* +// CHECK4-NEXT: [[TMP29:%.*]] = bitcast [10 x i32]* [[C5]] to i8* +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i64 40, i1 false) +// CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK4: .omp.lastprivate.done: +// CHECK4-NEXT: br label [[OMP_PRECOND_END]] +// CHECK4: omp.precond.end: +// CHECK4-NEXT: ret void +// +// +// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19 +// CHECK5-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i32 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK5-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK5-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 +// CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 +// CHECK5-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 +// CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK5-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK5-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK5: .execute: +// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[ARGC_CASTED]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARGC_CASTED]], align 4 +// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK5-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i32 [[TMP6]], [10 x i32]* [[TMP3]]) #[[ATTR4:[0-9]+]] +// CHECK5-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK5: .omp.deinit: +// CHECK5-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK5-NEXT: br label [[DOTEXIT:%.*]] +// CHECK5: .exit: +// CHECK5-NEXT: ret void +// +// +// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i32 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 +// CHECK5-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x i8*], align 4 +// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK5-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 +// CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 +// CHECK5-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 +// CHECK5-NEXT: [[C1:%.*]] = call i8* @__kmpc_alloc_shared(i32 40) +// CHECK5-NEXT: [[C_ON_STACK:%.*]] = bitcast i8* [[C1]] to [10 x i32]* +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK5-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK5-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK5-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK5: omp.precond.then: +// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = bitcast [10 x i32]* [[B4]] to i8* +// CHECK5-NEXT: [[TMP9:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false) +// CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK5-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK5-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK5-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK5: cond.true: +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK5-NEXT: br label [[COND_END:%.*]] +// CHECK5: cond.false: +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: br label [[COND_END]] +// CHECK5: cond.end: +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK5: omp.inner.for.cond: +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK5-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK5-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5: omp.inner.for.body: +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP19]] to i8* +// CHECK5-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 4 +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP20]] to i8* +// CHECK5-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP26:%.*]] = bitcast i32* [[ARGC_ADDR]] to i8* +// CHECK5-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 +// CHECK5-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK5-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP2]] to i8* +// CHECK5-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 +// CHECK5-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 +// CHECK5-NEXT: [[TMP30:%.*]] = bitcast [10 x i32]* [[B4]] to i8* +// CHECK5-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 4 +// CHECK5-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 +// CHECK5-NEXT: [[TMP32:%.*]] = bitcast [10 x i32]* [[C_ON_STACK]] to i8* +// CHECK5-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 4 +// CHECK5-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 +// CHECK5-NEXT: [[TMP34:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* +// CHECK5-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 4 +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK5-NEXT: [[TMP36:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK5-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK5-NEXT: [[TMP39:%.*]] = bitcast [7 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK5-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP38]], i32 [[TMP36]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP39]], i32 7) +// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK5: omp.inner.for.inc: +// CHECK5-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] +// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] +// CHECK5-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] +// CHECK5-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK5-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP46]], [[TMP47]] +// CHECK5-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] +// CHECK5: cond.true12: +// CHECK5-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK5-NEXT: br label [[COND_END14:%.*]] +// CHECK5: cond.false13: +// CHECK5-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: br label [[COND_END14]] +// CHECK5: cond.end14: +// CHECK5-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP48]], [[COND_TRUE12]] ], [ [[TMP49]], [[COND_FALSE13]] ] +// CHECK5-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP50]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK5: omp.inner.for.end: +// CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK5: omp.loop.exit: +// CHECK5-NEXT: [[TMP51:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP52:%.*]] = load i32, i32* [[TMP51]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP52]]) +// CHECK5-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP54:%.*]] = icmp ne i32 [[TMP53]], 0 +// CHECK5-NEXT: br i1 [[TMP54]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5: .omp.lastprivate.then: +// CHECK5-NEXT: [[TMP55:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* +// CHECK5-NEXT: [[TMP56:%.*]] = bitcast [10 x i32]* [[C_ON_STACK]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP55]], i8* align 4 [[TMP56]], i32 40, i1 false) +// CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK5: .omp.lastprivate.done: +// CHECK5-NEXT: br label [[OMP_PRECOND_END]] +// CHECK5: omp.precond.end: +// CHECK5-NEXT: call void @__kmpc_free_shared(i8* [[C1]]) +// CHECK5-NEXT: ret void // // -// CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l31 -// CHECK12-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i32 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK12-NEXT: entry: -// CHECK12-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK12-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK12-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK12-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK12-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK12-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 -// CHECK12-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 -// CHECK12-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 -// CHECK12-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK12-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK12-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK12-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK12: .execute: -// CHECK12-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP5]], i32* [[ARGC_CASTED]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARGC_CASTED]], align 4 -// CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK12-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i32 [[TMP6]], [10 x i32]* [[TMP3]]) #[[ATTR4:[0-9]+]] -// CHECK12-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK12: .omp.deinit: -// CHECK12-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK12-NEXT: br label [[DOTEXIT:%.*]] -// CHECK12: .exit: -// CHECK12-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[B3:%.*]] = alloca [10 x i32], align 4 +// CHECK5-NEXT: [[C4:%.*]] = alloca [10 x i32], align 4 +// CHECK5-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK5-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK5-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK5-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4 +// CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 +// CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK5-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 +// CHECK5-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK5-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK5-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK5: omp.precond.then: +// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[B3]] to i8* +// CHECK5-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 40, i1 false) +// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK5: omp.inner.for.cond: +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK5-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] +// CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5: omp.inner.for.body: +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 +// CHECK5-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I5]]) #[[ATTR5:[0-9]+]] +// CHECK5-NEXT: [[CALL7:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR5]] +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP19]] +// CHECK5-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR5]] +// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK5-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP20]] +// CHECK5-NEXT: [[CALL12:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX11]]) #[[ATTR5]] +// CHECK5-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD10]], [[CALL12]] +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK5-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP21]] +// CHECK5-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR5]] +// CHECK5-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD13]], [[CALL15]] +// CHECK5-NEXT: store i32 [[ADD16]], i32* [[TMP1]], align 4 +// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK5: omp.body.continue: +// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK5: omp.inner.for.inc: +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK5-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK5: omp.inner.for.end: +// CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK5: omp.loop.exit: +// CHECK5-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK5-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK5: .omp.lastprivate.then: +// CHECK5-NEXT: [[TMP28:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* +// CHECK5-NEXT: [[TMP29:%.*]] = bitcast [10 x i32]* [[C4]] to i8* +// CHECK5-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 40, i1 false) +// CHECK5-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK5: .omp.lastprivate.done: +// CHECK5-NEXT: br label [[OMP_PRECOND_END]] +// CHECK5: omp.precond.end: +// CHECK5-NEXT: ret void +// +// +// CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l19 +// CHECK6-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i32 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK6-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK6-NEXT: [[ARGC_CASTED:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK6-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK6-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 +// CHECK6-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 +// CHECK6-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 +// CHECK6-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 +// CHECK6-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK6-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK6-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK6: .execute: +// CHECK6-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 +// CHECK6-NEXT: store i32 [[TMP5]], i32* [[ARGC_CASTED]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARGC_CASTED]], align 4 +// CHECK6-NEXT: store i32 [[TMP4]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK6-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]], [10 x i32]* [[TMP1]], i32* [[TMP2]], i32 [[TMP6]], [10 x i32]* [[TMP3]]) #[[ATTR4:[0-9]+]] +// CHECK6-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK6: .omp.deinit: +// CHECK6-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK6-NEXT: br label [[DOTEXIT:%.*]] +// CHECK6: .exit: +// CHECK6-NEXT: ret void // // -// CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i32 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { -// CHECK12-NEXT: entry: -// CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK12-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK12-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK12-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 -// CHECK12-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x i8*], align 4 -// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK12-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK12-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 -// CHECK12-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 -// CHECK12-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 -// CHECK12-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 40, i16 1) -// CHECK12-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct._globalized_locals_ty* -// CHECK12-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 0 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 -// CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK12-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK12-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK12-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK12: omp.precond.then: -// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK12-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP10:%.*]] = bitcast [10 x i32]* [[B4]] to i8* -// CHECK12-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK12-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP10]], i8* align 4 [[TMP11]], i32 40, i1 false) -// CHECK12-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK12-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP13]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK12-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP14]], [[TMP15]] -// CHECK12-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK12: cond.true: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK12-NEXT: br label [[COND_END:%.*]] -// CHECK12: cond.false: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: br label [[COND_END]] -// CHECK12: cond.end: -// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP16]], [[COND_TRUE]] ], [ [[TMP17]], [[COND_FALSE]] ] -// CHECK12-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK12-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP19]], [[ADD]] -// CHECK12-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK12-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP21]] to i8* -// CHECK12-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 -// CHECK12-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK12-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP22]] to i8* -// CHECK12-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 -// CHECK12-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK12-NEXT: [[TMP28:%.*]] = bitcast i32* [[ARGC_ADDR]] to i8* -// CHECK12-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 -// CHECK12-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK12-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP2]] to i8* -// CHECK12-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 4 -// CHECK12-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 -// CHECK12-NEXT: [[TMP32:%.*]] = bitcast [10 x i32]* [[B4]] to i8* -// CHECK12-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 4 -// CHECK12-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 -// CHECK12-NEXT: [[TMP34:%.*]] = bitcast [10 x i32]* [[C1]] to i8* -// CHECK12-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 4 -// CHECK12-NEXT: [[TMP35:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 -// CHECK12-NEXT: [[TMP36:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* -// CHECK12-NEXT: store i8* [[TMP36]], i8** [[TMP35]], align 4 -// CHECK12-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK12-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP37]], 0 -// CHECK12-NEXT: [[TMP38:%.*]] = zext i1 [[TOBOOL]] to i32 -// CHECK12-NEXT: [[TMP39:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP40:%.*]] = load i32, i32* [[TMP39]], align 4 -// CHECK12-NEXT: [[TMP41:%.*]] = bitcast [7 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK12-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP40]], i32 [[TMP38]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP41]], i32 7) -// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] -// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] -// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP46]], [[TMP47]] -// CHECK12-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK12-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP48]], [[TMP49]] -// CHECK12-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] -// CHECK12: cond.true12: -// CHECK12-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK12-NEXT: br label [[COND_END14:%.*]] -// CHECK12: cond.false13: -// CHECK12-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: br label [[COND_END14]] -// CHECK12: cond.end14: -// CHECK12-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP50]], [[COND_TRUE12]] ], [ [[TMP51]], [[COND_FALSE13]] ] -// CHECK12-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP52]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK12: omp.inner.for.end: -// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK12: omp.loop.exit: -// CHECK12-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP54]]) -// CHECK12-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP56:%.*]] = icmp ne i32 [[TMP55]], 0 -// CHECK12-NEXT: br i1 [[TMP56]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK12: .omp.lastprivate.then: -// CHECK12-NEXT: [[TMP57:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* -// CHECK12-NEXT: [[TMP58:%.*]] = bitcast [10 x i32]* [[C1]] to i8* -// CHECK12-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP57]], i8* align 4 [[TMP58]], i32 40, i1 false) -// CHECK12-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK12: .omp.lastprivate.done: -// CHECK12-NEXT: br label [[OMP_PRECOND_END]] -// CHECK12: omp.precond.end: -// CHECK12-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP4]]) -// CHECK12-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i32 [[ARGC:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK6-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[B4:%.*]] = alloca [10 x i32], align 4 +// CHECK6-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [7 x i8*], align 4 +// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK6-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK6-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 +// CHECK6-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 +// CHECK6-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 +// CHECK6-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32*, i32** [[A_ADDR]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 +// CHECK6-NEXT: [[C1:%.*]] = call i8* @__kmpc_alloc_shared(i32 40) +// CHECK6-NEXT: [[C_ON_STACK:%.*]] = bitcast i8* [[C1]] to [10 x i32]* +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 +// CHECK6-NEXT: store i32 [[TMP4]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK6-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP5]], 0 +// CHECK6-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK6-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK6-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK6-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK6-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK6: omp.precond.then: +// CHECK6-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK6-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = bitcast [10 x i32]* [[B4]] to i8* +// CHECK6-NEXT: [[TMP9:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP8]], i8* align 4 [[TMP9]], i32 40, i1 false) +// CHECK6-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK6-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK6-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK6-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK6: cond.true: +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK6-NEXT: br label [[COND_END:%.*]] +// CHECK6: cond.false: +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: br label [[COND_END]] +// CHECK6: cond.end: +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK6-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK6: omp.inner.for.cond: +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK6-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK6-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK6: omp.inner.for.body: +// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP21:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP19]] to i8* +// CHECK6-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 4 +// CHECK6-NEXT: [[TMP23:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP20]] to i8* +// CHECK6-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 +// CHECK6-NEXT: [[TMP25:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK6-NEXT: [[TMP26:%.*]] = bitcast i32* [[ARGC_ADDR]] to i8* +// CHECK6-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 +// CHECK6-NEXT: [[TMP27:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK6-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP2]] to i8* +// CHECK6-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 +// CHECK6-NEXT: [[TMP29:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 +// CHECK6-NEXT: [[TMP30:%.*]] = bitcast [10 x i32]* [[B4]] to i8* +// CHECK6-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 4 +// CHECK6-NEXT: [[TMP31:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 5 +// CHECK6-NEXT: [[TMP32:%.*]] = bitcast [10 x i32]* [[C_ON_STACK]] to i8* +// CHECK6-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 4 +// CHECK6-NEXT: [[TMP33:%.*]] = getelementptr inbounds [7 x i8*], [7 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 6 +// CHECK6-NEXT: [[TMP34:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* +// CHECK6-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 4 +// CHECK6-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK6-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP35]], 0 +// CHECK6-NEXT: [[TMP36:%.*]] = zext i1 [[TOBOOL]] to i32 +// CHECK6-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 +// CHECK6-NEXT: [[TMP39:%.*]] = bitcast [7 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK6-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP38]], i32 [[TMP36]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32*, i32*, [10 x i32]*, [10 x i32]*, [10 x i32]*)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP39]], i32 7) +// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK6: omp.inner.for.inc: +// CHECK6-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] +// CHECK6-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] +// CHECK6-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] +// CHECK6-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK6-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP46]], [[TMP47]] +// CHECK6-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] +// CHECK6: cond.true12: +// CHECK6-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK6-NEXT: br label [[COND_END14:%.*]] +// CHECK6: cond.false13: +// CHECK6-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: br label [[COND_END14]] +// CHECK6: cond.end14: +// CHECK6-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP48]], [[COND_TRUE12]] ], [ [[TMP49]], [[COND_FALSE13]] ] +// CHECK6-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP50]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK6: omp.inner.for.end: +// CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK6: omp.loop.exit: +// CHECK6-NEXT: [[TMP51:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP52:%.*]] = load i32, i32* [[TMP51]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP52]]) +// CHECK6-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP54:%.*]] = icmp ne i32 [[TMP53]], 0 +// CHECK6-NEXT: br i1 [[TMP54]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK6: .omp.lastprivate.then: +// CHECK6-NEXT: [[TMP55:%.*]] = bitcast [10 x i32]* [[TMP1]] to i8* +// CHECK6-NEXT: [[TMP56:%.*]] = bitcast [10 x i32]* [[C_ON_STACK]] to i8* +// CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP55]], i8* align 4 [[TMP56]], i32 40, i1 false) +// CHECK6-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK6: .omp.lastprivate.done: +// CHECK6-NEXT: br label [[OMP_PRECOND_END]] +// CHECK6: omp.precond.end: +// CHECK6-NEXT: call void @__kmpc_free_shared(i8* [[C1]]) +// CHECK6-NEXT: ret void // // -// CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { -// CHECK12-NEXT: entry: -// CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK12-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 4 -// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK12-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK12-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK12-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[B3:%.*]] = alloca [10 x i32], align 4 -// CHECK12-NEXT: [[C4:%.*]] = alloca [10 x i32], align 4 -// CHECK12-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4 -// CHECK12-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK12-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK12-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 -// CHECK12-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 -// CHECK12-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 -// CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK12-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK12-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] -// CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK12: omp.precond.then: -// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[B3]] to i8* -// CHECK12-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* -// CHECK12-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 40, i1 false) -// CHECK12-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] -// CHECK12-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK12-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I5]]) #[[ATTR5:[0-9]+]] -// CHECK12-NEXT: [[CALL7:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR5]] -// CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP19]] -// CHECK12-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR5]] -// CHECK12-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK12-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP20]] -// CHECK12-NEXT: [[CALL12:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX11]]) #[[ATTR5]] -// CHECK12-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD10]], [[CALL12]] -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK12-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP21]] -// CHECK12-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR5]] -// CHECK12-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD13]], [[CALL15]] -// CHECK12-NEXT: store i32 [[ADD16]], i32* [[TMP1]], align 4 -// CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK12: omp.body.continue: -// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK12-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK12: omp.inner.for.end: -// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK12: omp.loop.exit: -// CHECK12-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) -// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 -// CHECK12-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK12: .omp.lastprivate.then: -// CHECK12-NEXT: [[TMP28:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* -// CHECK12-NEXT: [[TMP29:%.*]] = bitcast [10 x i32]* [[C4]] to i8* -// CHECK12-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 40, i1 false) -// CHECK12-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK12: .omp.lastprivate.done: -// CHECK12-NEXT: br label [[OMP_PRECOND_END]] -// CHECK12: omp.precond.end: -// CHECK12-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[C:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[D:%.*]]) #[[ATTR0]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK6-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 4 +// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[B3:%.*]] = alloca [10 x i32], align 4 +// CHECK6-NEXT: [[C4:%.*]] = alloca [10 x i32], align 4 +// CHECK6-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK6-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK6-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK6-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4 +// CHECK6-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 +// CHECK6-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK6-NEXT: store [10 x i32]* [[C]], [10 x i32]** [[C_ADDR]], align 4 +// CHECK6-NEXT: store [10 x i32]* [[D]], [10 x i32]** [[D_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32*, i32** [[A_ADDR]], align 4 +// CHECK6-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load [10 x i32]*, [10 x i32]** [[C_ADDR]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load [10 x i32]*, [10 x i32]** [[D_ADDR]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK6-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP6]], 0 +// CHECK6-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK6-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK6-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK6-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP7]] +// CHECK6-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK6: omp.precond.then: +// CHECK6-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK6-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = bitcast [10 x i32]* [[B3]] to i8* +// CHECK6-NEXT: [[TMP12:%.*]] = bitcast [10 x i32]* [[TMP2]] to i8* +// CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP11]], i8* align 4 [[TMP12]], i32 40, i1 false) +// CHECK6-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP14]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK6: omp.inner.for.cond: +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK6-NEXT: [[CMP6:%.*]] = icmp ule i32 [[TMP16]], [[TMP17]] +// CHECK6-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK6: omp.inner.for.body: +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 +// CHECK6-NEXT: [[CALL:%.*]] = call i32 @_Z3fooPi(i32* [[I5]]) #[[ATTR5:[0-9]+]] +// CHECK6-NEXT: [[CALL7:%.*]] = call i32 @_Z3fooPi(i32* [[TMP1]]) #[[ATTR5]] +// CHECK6-NEXT: [[ADD8:%.*]] = add nsw i32 [[CALL]], [[CALL7]] +// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[B3]], i32 0, i32 [[TMP19]] +// CHECK6-NEXT: [[CALL9:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX]]) #[[ATTR5]] +// CHECK6-NEXT: [[ADD10:%.*]] = add nsw i32 [[ADD8]], [[CALL9]] +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK6-NEXT: [[ARRAYIDX11:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[C4]], i32 0, i32 [[TMP20]] +// CHECK6-NEXT: [[CALL12:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX11]]) #[[ATTR5]] +// CHECK6-NEXT: [[ADD13:%.*]] = add nsw i32 [[ADD10]], [[CALL12]] +// CHECK6-NEXT: [[TMP21:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK6-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP4]], i32 0, i32 [[TMP21]] +// CHECK6-NEXT: [[CALL15:%.*]] = call i32 @_Z3fooPi(i32* [[ARRAYIDX14]]) #[[ATTR5]] +// CHECK6-NEXT: [[ADD16:%.*]] = add nsw i32 [[ADD13]], [[CALL15]] +// CHECK6-NEXT: store i32 [[ADD16]], i32* [[TMP1]], align 4 +// CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK6: omp.body.continue: +// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK6: omp.inner.for.inc: +// CHECK6-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK6-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK6: omp.inner.for.end: +// CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK6: omp.loop.exit: +// CHECK6-NEXT: [[TMP24:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]]) +// CHECK6-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 +// CHECK6-NEXT: br i1 [[TMP27]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK6: .omp.lastprivate.then: +// CHECK6-NEXT: [[TMP28:%.*]] = bitcast [10 x i32]* [[TMP3]] to i8* +// CHECK6-NEXT: [[TMP29:%.*]] = bitcast [10 x i32]* [[C4]] to i8* +// CHECK6-NEXT: call void @llvm.memcpy.p0i8.p0i8.i32(i8* align 4 [[TMP28]], i8* align 4 [[TMP29]], i32 40, i1 false) +// CHECK6-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK6: .omp.lastprivate.done: +// CHECK6-NEXT: br label [[OMP_PRECOND_END]] +// CHECK6: omp.precond.end: +// CHECK6-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_lambda_capturing.cpp b/clang/test/OpenMP/nvptx_lambda_capturing.cpp index 95a2fffb37dc6..2248efc71b76f 100644 --- a/clang/test/OpenMP/nvptx_lambda_capturing.cpp +++ b/clang/test/OpenMP/nvptx_lambda_capturing.cpp @@ -830,7 +830,6 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK2-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK2-NEXT: [[TMP7:%.*]] = load %class.anon*, %class.anon** [[TMP]], align 8 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast %class.anon* [[L7]] to i8* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %class.anon* [[TMP7]] to i8* @@ -877,7 +876,6 @@ int main(int argc, char **argv) { // CHECK2-NEXT: store %class.anon* [[TMP1]], %class.anon** [[TMP]], align 8 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK2: .execute: // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) @@ -1019,7 +1017,6 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[NVPTX_WARP_SIZE7:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK2-NEXT: [[THREAD_LIMIT8:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS6]], [[NVPTX_WARP_SIZE7]] // CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT8]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK2-NEXT: [[TMP9:%.*]] = load %class.anon.0*, %class.anon.0** [[_TMP2]], align 8 // CHECK2-NEXT: [[TMP10:%.*]] = bitcast %class.anon.0* [[L9]] to i8* // CHECK2-NEXT: [[TMP11:%.*]] = bitcast %class.anon.0* [[TMP9]] to i8* @@ -1086,7 +1083,6 @@ int main(int argc, char **argv) { // CHECK2-NEXT: store %class.anon.0* [[TMP4]], %class.anon.0** [[_TMP2]], align 8 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK2: .execute: // CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) @@ -1206,7 +1202,6 @@ int main(int argc, char **argv) { // CHECK2-NEXT: store %class.anon* [[TMP0]], %class.anon** [[TMP]], align 8 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK2: .execute: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) @@ -1340,7 +1335,6 @@ int main(int argc, char **argv) { // CHECK3-NEXT: [[NVPTX_WARP_SIZE7:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK3-NEXT: [[THREAD_LIMIT8:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS6]], [[NVPTX_WARP_SIZE7]] // CHECK3-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT8]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK3-NEXT: [[TMP9:%.*]] = load %class.anon*, %class.anon** [[_TMP2]], align 8 // CHECK3-NEXT: [[TMP10:%.*]] = bitcast %class.anon* [[L9]] to i8* // CHECK3-NEXT: [[TMP11:%.*]] = bitcast %class.anon* [[TMP9]] to i8* @@ -1407,7 +1401,6 @@ int main(int argc, char **argv) { // CHECK3-NEXT: store %class.anon* [[TMP4]], %class.anon** [[_TMP2]], align 8 // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK3: .execute: // CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) @@ -1588,7 +1581,6 @@ int main(int argc, char **argv) { // CHECK3-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK3-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK3-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK3-NEXT: [[TMP7:%.*]] = load %class.anon.0*, %class.anon.0** [[TMP]], align 8 // CHECK3-NEXT: [[TMP8:%.*]] = bitcast %class.anon.0* [[L7]] to i8* // CHECK3-NEXT: [[TMP9:%.*]] = bitcast %class.anon.0* [[TMP7]] to i8* @@ -1635,7 +1627,6 @@ int main(int argc, char **argv) { // CHECK3-NEXT: store %class.anon.0* [[TMP1]], %class.anon.0** [[TMP]], align 8 // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK3: .execute: // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) @@ -1697,7 +1688,6 @@ int main(int argc, char **argv) { // CHECK3-NEXT: store %class.anon.0* [[TMP0]], %class.anon.0** [[TMP]], align 8 // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK3: .execute: // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) @@ -1831,7 +1821,6 @@ int main(int argc, char **argv) { // CHECK4-NEXT: [[NVPTX_WARP_SIZE7:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK4-NEXT: [[THREAD_LIMIT8:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS6]], [[NVPTX_WARP_SIZE7]] // CHECK4-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT8]], i16 1) -// CHECK4-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK4-NEXT: [[TMP9:%.*]] = load %class.anon*, %class.anon** [[_TMP2]], align 8 // CHECK4-NEXT: [[TMP10:%.*]] = bitcast %class.anon* [[L9]] to i8* // CHECK4-NEXT: [[TMP11:%.*]] = bitcast %class.anon* [[TMP9]] to i8* @@ -1898,7 +1887,6 @@ int main(int argc, char **argv) { // CHECK4-NEXT: store %class.anon* [[TMP4]], %class.anon** [[_TMP2]], align 8 // CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK4-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK4-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK4-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK4: .execute: // CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) @@ -2079,7 +2067,6 @@ int main(int argc, char **argv) { // CHECK4-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK4-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK4-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK4-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK4-NEXT: [[TMP7:%.*]] = load %class.anon.0*, %class.anon.0** [[TMP]], align 8 // CHECK4-NEXT: [[TMP8:%.*]] = bitcast %class.anon.0* [[L7]] to i8* // CHECK4-NEXT: [[TMP9:%.*]] = bitcast %class.anon.0* [[TMP7]] to i8* @@ -2126,7 +2113,6 @@ int main(int argc, char **argv) { // CHECK4-NEXT: store %class.anon.0* [[TMP1]], %class.anon.0** [[TMP]], align 8 // CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK4-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK4-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK4-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK4: .execute: // CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) @@ -2188,7 +2174,6 @@ int main(int argc, char **argv) { // CHECK4-NEXT: store %class.anon.0* [[TMP0]], %class.anon.0** [[TMP]], align 8 // CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK4-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK4-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK4-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK4: .execute: // CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) diff --git a/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp index 7ecfeae1f8879..ccbd6c6e87d3f 100644 --- a/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_multi_target_parallel_codegen.cpp @@ -31,7 +31,6 @@ int main() { // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK1-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK1: .execute: // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) @@ -128,7 +127,6 @@ int main() { // CHECK1-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK1-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK1-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK1-NEXT: call void @_Z3usev() #[[ATTR7]] // CHECK1-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] // CHECK1: .termination.notifier: @@ -171,7 +169,6 @@ int main() { // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK2: .execute: // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) @@ -268,7 +265,6 @@ int main() { // CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK2-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK2-NEXT: call void @_Z3usev() #[[ATTR7]] // CHECK2-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] // CHECK2: .termination.notifier: @@ -311,7 +307,6 @@ int main() { // CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK3: .execute: // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) @@ -408,7 +403,6 @@ int main() { // CHECK3-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK3-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK3-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK3-NEXT: call void @_Z3usev() #[[ATTR7]] // CHECK3-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] // CHECK3: .termination.notifier: diff --git a/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp index 600d71a6ea48c..cbd957000bc98 100644 --- a/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_nested_parallel_codegen.cpp @@ -106,7 +106,6 @@ int main() { // CHECK1-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK1-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK1-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR7:[0-9]+]] // CHECK1-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 2) @@ -305,7 +304,6 @@ int main() { // CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK2-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK2-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR7:[0-9]+]] // CHECK2-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 2) @@ -504,7 +502,6 @@ int main() { // CHECK3-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK3-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK3-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: call void @_Z3usePi(i32* [[TMP0]]) #[[ATTR7:[0-9]+]] // CHECK3-NEXT: call void @__kmpc_push_num_threads(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 2) diff --git a/clang/test/OpenMP/nvptx_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_codegen.cpp index c795a2121f052..7931e23e3996b 100644 --- a/clang/test/OpenMP/nvptx_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_codegen.cpp @@ -2,11 +2,8 @@ // Test target codegen - host bc file has to be created first. // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK1 -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -aux-triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK2 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -aux-triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns -disable-O0-optnone | FileCheck %s --check-prefix=CHECK3 -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -aux-triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK4 -// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -aux-triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK5 +// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -aux-triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -disable-llvm-optzns -disable-O0-optnone | FileCheck %s --check-prefix=CHECK2 // expected-no-diagnostics #ifndef HEADER #define HEADER @@ -76,989 +73,6 @@ int bar(int n){ } #endif -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_worker -// CHECK1-SAME: () #[[ATTR0:[0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: store i8* null, i8** [[WORK_FN]], align 8 -// CHECK1-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK1-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK1: .await.work: -// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK1-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK1-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK1-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK1-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK1-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK1: .select.workers: -// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK1-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK1-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK1: .execute.parallel: -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) -// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK1-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*) -// CHECK1-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] -// CHECK1: .execute.fn: -// CHECK1-NEXT: call void @__omp_outlined___wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3:[0-9]+]] -// CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK1: .check.next: -// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK1-NEXT: [[WORK_MATCH1:%.*]] = icmp eq i8* [[TMP6]], bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*) -// CHECK1-NEXT: br i1 [[WORK_MATCH1]], label [[DOTEXECUTE_FN2:%.*]], label [[DOTCHECK_NEXT3:%.*]] -// CHECK1: .execute.fn2: -// CHECK1-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3]] -// CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL]] -// CHECK1: .check.next3: -// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK1-NEXT: [[WORK_MATCH4:%.*]] = icmp eq i8* [[TMP7]], bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*) -// CHECK1-NEXT: br i1 [[WORK_MATCH4]], label [[DOTEXECUTE_FN5:%.*]], label [[DOTCHECK_NEXT6:%.*]] -// CHECK1: .execute.fn5: -// CHECK1-NEXT: call void @__omp_outlined__2_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3]] -// CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL]] -// CHECK1: .check.next6: -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK1-NEXT: call void [[TMP8]](i16 0, i32 [[TMP4]]) -// CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL]] -// CHECK1: .terminate.parallel: -// CHECK1-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK1-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK1: .barrier.parallel: -// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK1-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK1: .exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29 -// CHECK1-SAME: (i64 [[A:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS7:%.*]] = alloca [0 x i8*], align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS8:%.*]] = alloca [0 x i8*], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK1-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK1-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK1: .worker: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_worker() #[[ATTR3]] -// CHECK1-NEXT: br label [[DOTEXIT:%.*]] -// CHECK1: .mastercheck: -// CHECK1-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK1-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK1-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK1-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK1-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK1: .master: -// CHECK1-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK1-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP6]], i64 0) -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS7]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 0, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP7]], i64 0) -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS8]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** [[TMP8]], i64 0) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK1-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK1: .termination.notifier: -// CHECK1-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK1-NEXT: br label [[DOTEXIT]] -// CHECK1: .exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 42, i32* [[A]], align 4 -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined___wrapper -// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 43, i32* [[A]], align 4 -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 44, i32* [[A]], align 4 -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper -// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_worker -// CHECK1-SAME: () #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: store i8* null, i8** [[WORK_FN]], align 8 -// CHECK1-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK1-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK1: .await.work: -// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK1-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK1-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK1-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK1-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK1-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK1: .select.workers: -// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK1-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK1-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK1: .execute.parallel: -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK1-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*) -// CHECK1-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] -// CHECK1: .execute.fn: -// CHECK1-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3]] -// CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK1: .check.next: -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK1-NEXT: call void [[TMP6]](i16 0, i32 [[TMP4]]) -// CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL]] -// CHECK1: .terminate.parallel: -// CHECK1-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK1-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK1: .barrier.parallel: -// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK1-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK1: .exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46 -// CHECK1-SAME: (i64 [[N:%.*]], i64 [[A:%.*]], i64 [[AA:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK1-NEXT: br i1 [[TMP1]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK1: .worker: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_worker() #[[ATTR3]] -// CHECK1-NEXT: br label [[DOTEXIT:%.*]] -// CHECK1: .mastercheck: -// CHECK1-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE5]], 1 -// CHECK1-NEXT: [[TMP3:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], 1 -// CHECK1-NEXT: [[TMP4:%.*]] = xor i32 [[TMP2]], -1 -// CHECK1-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP3]], [[TMP4]] -// CHECK1-NEXT: [[TMP5:%.*]] = icmp eq i32 [[NVPTX_TID3]], [[MASTER_TID]] -// CHECK1-NEXT: br i1 [[TMP5]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK1: .master: -// CHECK1-NEXT: [[NVPTX_NUM_THREADS6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[NVPTX_WARP_SIZE7:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[THREAD_LIMIT8:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS6]], [[NVPTX_WARP_SIZE7]] -// CHECK1-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT8]], i16 1) -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1000 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i1 [[CMP]] to i32 -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 [[TMP8]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP9]], i64 0) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV1]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 8 -// CHECK1-NEXT: [[CONV9:%.*]] = sext i16 [[TMP11]] to i32 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], 1 -// CHECK1-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i16 -// CHECK1-NEXT: store i16 [[CONV11]], i16* [[CONV2]], align 8 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK1: .termination.notifier: -// CHECK1-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK1-NEXT: br label [[DOTEXIT]] -// CHECK1: .exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32 45, i32* [[A]], align 4 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]]) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK1-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58_worker -// CHECK1-SAME: () #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: store i8* null, i8** [[WORK_FN]], align 8 -// CHECK1-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK1-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK1: .await.work: -// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK1-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK1-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK1-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK1-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK1-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK1: .select.workers: -// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK1-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK1-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK1: .execute.parallel: -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK1-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__4_wrapper to i8*) -// CHECK1-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] -// CHECK1: .execute.fn: -// CHECK1-NEXT: call void @__omp_outlined__4_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3]] -// CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK1: .check.next: -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK1-NEXT: call void [[TMP6]](i16 0, i32 [[TMP4]]) -// CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL]] -// CHECK1: .terminate.parallel: -// CHECK1-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK1-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK1: .barrier.parallel: -// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK1-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK1: .exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58 -// CHECK1-SAME: (i64 [[A:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK1-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK1-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK1: .worker: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58_worker() #[[ATTR3]] -// CHECK1-NEXT: br label [[DOTEXIT:%.*]] -// CHECK1: .mastercheck: -// CHECK1-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK1-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK1-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK1-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK1-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK1: .master: -// CHECK1-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK1-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* @"_openmp_static_kernel$size", align 8 -// CHECK1-NEXT: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i64 [[TMP6]], i16 [[TMP5]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) -// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct._globalized_locals_ty* -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[A7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP9]], i32 0, i32 0 -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[A7]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast i32* [[A7]] to i8* -// CHECK1-NEXT: store i8* [[TMP13]], i8** [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP11]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__4 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__4_wrapper to i8*), i8** [[TMP14]], i64 1) -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[A7]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[A7]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK1-NEXT: call void @__kmpc_restore_team_static_memory(i16 0, i16 [[TMP16]]) -// CHECK1-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK1: .termination.notifier: -// CHECK1-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK1-NEXT: br label [[DOTEXIT]] -// CHECK1: .exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[CRITICAL_COUNTER:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_warp_active_thread_mask() -// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: store i32 0, i32* [[CRITICAL_COUNTER]], align 4 -// CHECK1-NEXT: br label [[OMP_CRITICAL_LOOP:%.*]] -// CHECK1: omp.critical.loop: -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CRITICAL_COUNTER]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], [[NVPTX_NUM_THREADS]] -// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_CRITICAL_TEST:%.*]], label [[OMP_CRITICAL_EXIT:%.*]] -// CHECK1: omp.critical.test: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CRITICAL_COUNTER]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = icmp eq i32 [[NVPTX_TID]], [[TMP4]] -// CHECK1-NEXT: br i1 [[TMP5]], label [[OMP_CRITICAL_BODY:%.*]], label [[OMP_CRITICAL_SYNC:%.*]] -// CHECK1: omp.critical.body: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @"_gomp_critical_user_$var") -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @"_gomp_critical_user_$var") -// CHECK1-NEXT: br label [[OMP_CRITICAL_SYNC]] -// CHECK1: omp.critical.sync: -// CHECK1-NEXT: call void @__kmpc_syncwarp(i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[CRITICAL_COUNTER]], align 4 -// CHECK1-NEXT: br label [[OMP_CRITICAL_LOOP]] -// CHECK1: omp.critical.exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__4_wrapper -// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 -// CHECK1-NEXT: call void @__omp_outlined__4(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] -// CHECK1-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_worker -// CHECK2-SAME: () #[[ATTR0:[0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK2-NEXT: store i8* null, i8** [[WORK_FN]], align 8 -// CHECK2-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK2-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK2: .await.work: -// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK2-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK2-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK2-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK2-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK2-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK2: .select.workers: -// CHECK2-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK2-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK2-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK2: .execute.parallel: -// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) -// CHECK2-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK2-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*) -// CHECK2-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] -// CHECK2: .execute.fn: -// CHECK2-NEXT: call void @__omp_outlined___wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3:[0-9]+]] -// CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK2: .check.next: -// CHECK2-NEXT: [[TMP6:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK2-NEXT: [[WORK_MATCH1:%.*]] = icmp eq i8* [[TMP6]], bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*) -// CHECK2-NEXT: br i1 [[WORK_MATCH1]], label [[DOTEXECUTE_FN2:%.*]], label [[DOTCHECK_NEXT3:%.*]] -// CHECK2: .execute.fn2: -// CHECK2-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3]] -// CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL]] -// CHECK2: .check.next3: -// CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK2-NEXT: [[WORK_MATCH4:%.*]] = icmp eq i8* [[TMP7]], bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*) -// CHECK2-NEXT: br i1 [[WORK_MATCH4]], label [[DOTEXECUTE_FN5:%.*]], label [[DOTCHECK_NEXT6:%.*]] -// CHECK2: .execute.fn5: -// CHECK2-NEXT: call void @__omp_outlined__2_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3]] -// CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL]] -// CHECK2: .check.next6: -// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK2-NEXT: call void [[TMP8]](i16 0, i32 [[TMP4]]) -// CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL]] -// CHECK2: .terminate.parallel: -// CHECK2-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK2-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK2: .barrier.parallel: -// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK2-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK2: .exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29 -// CHECK2-SAME: (i64 [[A:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS7:%.*]] = alloca [0 x i8*], align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS8:%.*]] = alloca [0 x i8*], align 8 -// CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK2-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK2-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK2: .worker: -// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_worker() #[[ATTR3]] -// CHECK2-NEXT: br label [[DOTEXIT:%.*]] -// CHECK2: .mastercheck: -// CHECK2-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK2-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK2-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK2-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK2-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK2-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK2: .master: -// CHECK2-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-NEXT: [[TMP6:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP6]], i64 0) -// CHECK2-NEXT: [[TMP7:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS7]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 0, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP7]], i64 0) -// CHECK2-NEXT: [[TMP8:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS8]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** [[TMP8]], i64 0) -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 -// CHECK2-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK2: .termination.notifier: -// CHECK2-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK2-NEXT: br label [[DOTEXIT]] -// CHECK2: .exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 42, i32* [[A]], align 4 -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined___wrapper -// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 43, i32* [[A]], align 4 -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 44, i32* [[A]], align 4 -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper -// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_worker -// CHECK2-SAME: () #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK2-NEXT: store i8* null, i8** [[WORK_FN]], align 8 -// CHECK2-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK2-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK2: .await.work: -// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK2-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK2-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK2-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK2-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK2-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK2: .select.workers: -// CHECK2-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK2-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK2-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK2: .execute.parallel: -// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK2-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*) -// CHECK2-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] -// CHECK2: .execute.fn: -// CHECK2-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3]] -// CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK2: .check.next: -// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK2-NEXT: call void [[TMP6]](i16 0, i32 [[TMP4]]) -// CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL]] -// CHECK2: .terminate.parallel: -// CHECK2-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK2-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK2: .barrier.parallel: -// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK2-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK2: .exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46 -// CHECK2-SAME: (i64 [[N:%.*]], i64 [[A:%.*]], i64 [[AA:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 -// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* -// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK2-NEXT: br i1 [[TMP1]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK2: .worker: -// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_worker() #[[ATTR3]] -// CHECK2-NEXT: br label [[DOTEXIT:%.*]] -// CHECK2: .mastercheck: -// CHECK2-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK2-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE5]], 1 -// CHECK2-NEXT: [[TMP3:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], 1 -// CHECK2-NEXT: [[TMP4:%.*]] = xor i32 [[TMP2]], -1 -// CHECK2-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP3]], [[TMP4]] -// CHECK2-NEXT: [[TMP5:%.*]] = icmp eq i32 [[NVPTX_TID3]], [[MASTER_TID]] -// CHECK2-NEXT: br i1 [[TMP5]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK2: .master: -// CHECK2-NEXT: [[NVPTX_NUM_THREADS6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[NVPTX_WARP_SIZE7:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[THREAD_LIMIT8:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS6]], [[NVPTX_WARP_SIZE7]] -// CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT8]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK2-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1000 -// CHECK2-NEXT: [[TMP8:%.*]] = zext i1 [[CMP]] to i32 -// CHECK2-NEXT: [[TMP9:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 [[TMP8]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP9]], i64 0) -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV1]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 8 -// CHECK2-NEXT: [[CONV9:%.*]] = sext i16 [[TMP11]] to i32 -// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], 1 -// CHECK2-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i16 -// CHECK2-NEXT: store i16 [[CONV11]], i16* [[CONV2]], align 8 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK2-NEXT: store i32 [[ADD12]], i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK2: .termination.notifier: -// CHECK2-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK2-NEXT: br label [[DOTEXIT]] -// CHECK2: .exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32 45, i32* [[A]], align 4 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]]) -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK2-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58_worker -// CHECK2-SAME: () #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK2-NEXT: store i8* null, i8** [[WORK_FN]], align 8 -// CHECK2-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK2-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK2: .await.work: -// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK2-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK2-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK2-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK2-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK2-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK2: .select.workers: -// CHECK2-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK2-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK2-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK2: .execute.parallel: -// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK2-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__4_wrapper to i8*) -// CHECK2-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] -// CHECK2: .execute.fn: -// CHECK2-NEXT: call void @__omp_outlined__4_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3]] -// CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK2: .check.next: -// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK2-NEXT: call void [[TMP6]](i16 0, i32 [[TMP4]]) -// CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL]] -// CHECK2: .terminate.parallel: -// CHECK2-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK2-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK2: .barrier.parallel: -// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK2-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK2: .exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58 -// CHECK2-SAME: (i64 [[A:%.*]]) #[[ATTR1]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 -// CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK2-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK2-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK2: .worker: -// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58_worker() #[[ATTR3]] -// CHECK2-NEXT: br label [[DOTEXIT:%.*]] -// CHECK2: .mastercheck: -// CHECK2-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK2-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK2-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK2-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK2-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK2-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK2: .master: -// CHECK2-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK2-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 1) -// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty* -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[A7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: store i32 [[TMP7]], i32* [[A7]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i32* [[A7]] to i8* -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__4 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__4_wrapper to i8*), i8** [[TMP11]], i64 1) -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[A7]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK2-NEXT: store i32 [[INC]], i32* [[A7]], align 4 -// CHECK2-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP5]]) -// CHECK2-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK2: .termination.notifier: -// CHECK2-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK2-NEXT: br label [[DOTEXIT]] -// CHECK2: .exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[CRITICAL_COUNTER:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_warp_active_thread_mask() -// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: store i32 0, i32* [[CRITICAL_COUNTER]], align 4 -// CHECK2-NEXT: br label [[OMP_CRITICAL_LOOP:%.*]] -// CHECK2: omp.critical.loop: -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CRITICAL_COUNTER]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], [[NVPTX_NUM_THREADS]] -// CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_CRITICAL_TEST:%.*]], label [[OMP_CRITICAL_EXIT:%.*]] -// CHECK2: omp.critical.test: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CRITICAL_COUNTER]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = icmp eq i32 [[NVPTX_TID]], [[TMP4]] -// CHECK2-NEXT: br i1 [[TMP5]], label [[OMP_CRITICAL_BODY:%.*]], label [[OMP_CRITICAL_SYNC:%.*]] -// CHECK2: omp.critical.body: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK2-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @"_gomp_critical_user_$var") -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK2-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 -// CHECK2-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @"_gomp_critical_user_$var") -// CHECK2-NEXT: br label [[OMP_CRITICAL_SYNC]] -// CHECK2: omp.critical.sync: -// CHECK2-NEXT: call void @__kmpc_syncwarp(i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK2-NEXT: store i32 [[TMP9]], i32* [[CRITICAL_COUNTER]], align 4 -// CHECK2-NEXT: br label [[OMP_CRITICAL_LOOP]] -// CHECK2: omp.critical.exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__4_wrapper -// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** -// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 -// CHECK2-NEXT: call void @__omp_outlined__4(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] -// CHECK2-NEXT: ret void -// -// // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_worker // CHECK3-SAME: () #[[ATTR0:[0-9]+]] { // CHECK3-NEXT: entry: @@ -1113,8 +127,6 @@ int bar(int n){ // CHECK3-NEXT: br label [[DOTAWAIT_WORK]] // CHECK3: .exit: // CHECK3-NEXT: ret void -// -// // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29 // CHECK3-SAME: (i32 [[A:%.*]]) #[[ATTR0]] { // CHECK3-NEXT: entry: @@ -1165,8 +177,6 @@ int bar(int n){ // CHECK3-NEXT: br label [[DOTEXIT]] // CHECK3: .exit: // CHECK3-NEXT: ret void -// -// // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { // CHECK3-NEXT: entry: @@ -1177,8 +187,6 @@ int bar(int n){ // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK3-NEXT: store i32 42, i32* [[A]], align 4 // CHECK3-NEXT: ret void -// -// // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined___wrapper // CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { // CHECK3-NEXT: entry: @@ -1192,8 +200,6 @@ int bar(int n){ // CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] // CHECK3-NEXT: ret void -// -// // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1 // CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { // CHECK3-NEXT: entry: @@ -1204,8 +210,6 @@ int bar(int n){ // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK3-NEXT: store i32 43, i32* [[A]], align 4 // CHECK3-NEXT: ret void -// -// // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper // CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { // CHECK3-NEXT: entry: @@ -1219,8 +223,6 @@ int bar(int n){ // CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] // CHECK3-NEXT: ret void -// -// // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__2 // CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { // CHECK3-NEXT: entry: @@ -1231,8 +233,6 @@ int bar(int n){ // CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK3-NEXT: store i32 44, i32* [[A]], align 4 // CHECK3-NEXT: ret void -// -// // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper // CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { // CHECK3-NEXT: entry: @@ -1246,8 +246,6 @@ int bar(int n){ // CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] // CHECK3-NEXT: ret void -// -// // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_worker // CHECK3-SAME: () #[[ATTR0]] { // CHECK3-NEXT: entry: @@ -1288,8 +286,6 @@ int bar(int n){ // CHECK3-NEXT: br label [[DOTAWAIT_WORK]] // CHECK3: .exit: // CHECK3-NEXT: ret void -// -// // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46 // CHECK3-SAME: (i32 [[N:%.*]], i32 [[A:%.*]], i32 [[AA:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK3-NEXT: entry: @@ -1354,8 +350,6 @@ int bar(int n){ // CHECK3-NEXT: br label [[DOTEXIT]] // CHECK3: .exit: // CHECK3-NEXT: ret void -// -// // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__3 // CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { // CHECK3-NEXT: entry: @@ -1369,8 +363,6 @@ int bar(int n){ // CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 // CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]]) // CHECK3-NEXT: ret void -// -// // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper // CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { // CHECK3-NEXT: entry: @@ -1384,8 +376,6 @@ int bar(int n){ // CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK3-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] // CHECK3-NEXT: ret void -// -// // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58_worker // CHECK3-SAME: () #[[ATTR0]] { // CHECK3-NEXT: entry: @@ -1426,8 +416,6 @@ int bar(int n){ // CHECK3-NEXT: br label [[DOTAWAIT_WORK]] // CHECK3: .exit: // CHECK3-NEXT: ret void -// -// // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58 // CHECK3-SAME: (i32 [[A:%.*]]) #[[ATTR0]] { // CHECK3-NEXT: entry: @@ -1486,8 +474,6 @@ int bar(int n){ // CHECK3-NEXT: br label [[DOTEXIT]] // CHECK3: .exit: // CHECK3-NEXT: ret void -// -// // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__4 // CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR0]] { // CHECK3-NEXT: entry: @@ -1528,8 +514,6 @@ int bar(int n){ // CHECK3-NEXT: br label [[OMP_CRITICAL_LOOP]] // CHECK3: omp.critical.exit: // CHECK3-NEXT: ret void -// -// // CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__4_wrapper // CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { // CHECK3-NEXT: entry: @@ -1547,8 +531,6 @@ int bar(int n){ // CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4 // CHECK3-NEXT: call void @__omp_outlined__4(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR2]] // CHECK3-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_worker // CHECK4-SAME: () #[[ATTR0:[0-9]+]] { // CHECK4-NEXT: entry: @@ -1603,8 +585,6 @@ int bar(int n){ // CHECK4-NEXT: br label [[DOTAWAIT_WORK]] // CHECK4: .exit: // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29 // CHECK4-SAME: (i32 [[A:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK4-NEXT: entry: @@ -1655,8 +635,6 @@ int bar(int n){ // CHECK4-NEXT: br label [[DOTEXIT]] // CHECK4: .exit: // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { // CHECK4-NEXT: entry: @@ -1667,8 +645,6 @@ int bar(int n){ // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK4-NEXT: store i32 42, i32* [[A]], align 4 // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined___wrapper // CHECK4-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { // CHECK4-NEXT: entry: @@ -1682,21 +658,316 @@ int bar(int n){ // CHECK4-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] // CHECK4-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32 43, i32* [[A]], align 4 +// CHECK4-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper +// CHECK4-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +// CHECK4-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK4-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK4-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +// CHECK4-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +// CHECK4-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32 44, i32* [[A]], align 4 +// CHECK4-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper +// CHECK4-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +// CHECK4-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK4-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK4-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +// CHECK4-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +// CHECK4-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_worker +// CHECK4-SAME: () #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 +// CHECK4-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK4-NEXT: store i8* null, i8** [[WORK_FN]], align 4 +// CHECK4-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK4-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK4: .await.work: +// CHECK4-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK4-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK4-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK4-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK4-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK4-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK4-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK4: .select.workers: +// CHECK4-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK4-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK4-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK4: .execute.parallel: +// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK4-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*) +// CHECK4-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] +// CHECK4: .execute.fn: +// CHECK4-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3]] +// CHECK4-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK4: .check.next: +// CHECK4-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK4-NEXT: call void [[TMP6]](i16 0, i32 [[TMP4]]) +// CHECK4-NEXT: br label [[DOTTERMINATE_PARALLEL]] +// CHECK4: .terminate.parallel: +// CHECK4-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK4-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK4: .barrier.parallel: +// CHECK4-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK4-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK4: .exit: +// CHECK4-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46 +// CHECK4-SAME: (i32 [[N:%.*]], i32 [[A:%.*]], i32 [[AA:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 +// CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK4-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK4-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK4-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK4-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK4-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK4-NEXT: br i1 [[TMP1]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK4: .worker: +// CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_worker() #[[ATTR3]] +// CHECK4-NEXT: br label [[DOTEXIT:%.*]] +// CHECK4: .mastercheck: +// CHECK4-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK4-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK4-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK4-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK4-NEXT: [[TMP3:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK4-NEXT: [[TMP4:%.*]] = xor i32 [[TMP2]], -1 +// CHECK4-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP3]], [[TMP4]] +// CHECK4-NEXT: [[TMP5:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK4-NEXT: br i1 [[TMP5]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK4: .master: +// CHECK4-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK4-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK4-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK4-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK4-NEXT: call void @__kmpc_data_sharing_init_stack() +// CHECK4-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1000 +// CHECK4-NEXT: [[TMP8:%.*]] = zext i1 [[CMP]] to i32 +// CHECK4-NEXT: [[TMP9:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 [[TMP8]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP9]], i32 0) +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: [[CONV7:%.*]] = sext i16 [[TMP11]] to i32 +// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[CONV7]], 1 +// CHECK4-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD8]] to i16 +// CHECK4-NEXT: store i16 [[CONV9]], i16* [[CONV]], align 4 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK4-NEXT: store i32 [[ADD10]], i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK4: .termination.notifier: +// CHECK4-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK4-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK4-NEXT: br label [[DOTEXIT]] +// CHECK4: .exit: +// CHECK4-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__3 +// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32 45, i32* [[A]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK4-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper +// CHECK4-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +// CHECK4-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK4-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK4-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +// CHECK4-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +// CHECK4-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58_worker +// CHECK4-SAME: () #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 +// CHECK4-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK4-NEXT: store i8* null, i8** [[WORK_FN]], align 4 +// CHECK4-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK4-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK4: .await.work: +// CHECK4-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK4-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK4-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK4-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK4-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK4-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK4-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK4: .select.workers: +// CHECK4-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK4-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK4-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK4: .execute.parallel: +// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK4-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__4_wrapper to i8*) +// CHECK4-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] +// CHECK4: .execute.fn: +// CHECK4-NEXT: call void @__omp_outlined__4_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3]] +// CHECK4-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK4: .check.next: +// CHECK4-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK4-NEXT: call void [[TMP6]](i16 0, i32 [[TMP4]]) +// CHECK4-NEXT: br label [[DOTTERMINATE_PARALLEL]] +// CHECK4: .terminate.parallel: +// CHECK4-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK4-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK4: .barrier.parallel: +// CHECK4-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK4-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK4: .exit: +// CHECK4-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58 +// CHECK4-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 +// CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK4-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK4-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK4-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK4-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK4-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK4: .worker: +// CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58_worker() #[[ATTR3]] +// CHECK4-NEXT: br label [[DOTEXIT:%.*]] +// CHECK4: .mastercheck: +// CHECK4-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK4-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK4-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK4-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK4-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK4-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK4-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK4-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK4-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK4: .master: +// CHECK4-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK4-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK4-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK4-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK4-NEXT: call void @__kmpc_data_sharing_init_stack() +// CHECK4-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 4, i16 1) +// CHECK4-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty* +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK4-NEXT: [[A7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP6]], i32 0, i32 0 +// CHECK4-NEXT: store i32 [[TMP7]], i32* [[A7]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP10:%.*]] = bitcast i32* [[A7]] to i8* +// CHECK4-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__4 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__4_wrapper to i8*), i8** [[TMP11]], i32 1) +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[A7]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK4-NEXT: store i32 [[INC]], i32* [[A7]], align 4 +// CHECK4-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP5]]) +// CHECK4-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK4: .termination.notifier: +// CHECK4-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK4-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK4-NEXT: br label [[DOTEXIT]] +// CHECK4: .exit: +// CHECK4-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__4 +// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[CRITICAL_COUNTER:%.*]] = alloca i32, align 4 // CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32 43, i32* [[A]], align 4 +// CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_warp_active_thread_mask() +// CHECK4-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK4-NEXT: store i32 0, i32* [[CRITICAL_COUNTER]], align 4 +// CHECK4-NEXT: br label [[OMP_CRITICAL_LOOP:%.*]] +// CHECK4: omp.critical.loop: +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[CRITICAL_COUNTER]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], [[NVPTX_NUM_THREADS]] +// CHECK4-NEXT: br i1 [[TMP3]], label [[OMP_CRITICAL_TEST:%.*]], label [[OMP_CRITICAL_EXIT:%.*]] +// CHECK4: omp.critical.test: +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[CRITICAL_COUNTER]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = icmp eq i32 [[NVPTX_TID]], [[TMP4]] +// CHECK4-NEXT: br i1 [[TMP5]], label [[OMP_CRITICAL_BODY:%.*]], label [[OMP_CRITICAL_SYNC:%.*]] +// CHECK4: omp.critical.body: +// CHECK4-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK4-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @"_gomp_critical_user_$var") +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK4-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK4-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @"_gomp_critical_user_$var") +// CHECK4-NEXT: br label [[OMP_CRITICAL_SYNC]] +// CHECK4: omp.critical.sync: +// CHECK4-NEXT: call void @__kmpc_syncwarp(i32 [[TMP1]]) +// CHECK4-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK4-NEXT: store i32 [[TMP9]], i32* [[CRITICAL_COUNTER]], align 4 +// CHECK4-NEXT: br label [[OMP_CRITICAL_LOOP]] +// CHECK4: omp.critical.exit: // CHECK4-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper +// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__4_wrapper // CHECK4-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 @@ -1707,814 +978,1427 @@ int bar(int n){ // CHECK4-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 // CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 // CHECK4-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK4-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -// CHECK4-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32 44, i32* [[A]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 +// CHECK4-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** +// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4 +// CHECK4-NEXT: call void @__omp_outlined__4(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] // CHECK4-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_worker +// CHECK5-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK5-NEXT: store i8* null, i8** [[WORK_FN]], align 4 +// CHECK5-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK5-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK5: .await.work: +// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK5-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK5-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK5-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK5-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK5-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK5-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK5: .select.workers: +// CHECK5-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK5-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK5-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK5: .execute.parallel: +// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK5-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK5-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*) +// CHECK5-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] +// CHECK5: .execute.fn: +// CHECK5-NEXT: call void @__omp_outlined___wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3:[0-9]+]] +// CHECK5-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK5: .check.next: +// CHECK5-NEXT: [[TMP6:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK5-NEXT: [[WORK_MATCH1:%.*]] = icmp eq i8* [[TMP6]], bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*) +// CHECK5-NEXT: br i1 [[WORK_MATCH1]], label [[DOTEXECUTE_FN2:%.*]], label [[DOTCHECK_NEXT3:%.*]] +// CHECK5: .execute.fn2: +// CHECK5-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3]] +// CHECK5-NEXT: br label [[DOTTERMINATE_PARALLEL]] +// CHECK5: .check.next3: +// CHECK5-NEXT: [[TMP7:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK5-NEXT: [[WORK_MATCH4:%.*]] = icmp eq i8* [[TMP7]], bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*) +// CHECK5-NEXT: br i1 [[WORK_MATCH4]], label [[DOTEXECUTE_FN5:%.*]], label [[DOTCHECK_NEXT6:%.*]] +// CHECK5: .execute.fn5: +// CHECK5-NEXT: call void @__omp_outlined__2_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3]] +// CHECK5-NEXT: br label [[DOTTERMINATE_PARALLEL]] +// CHECK5: .check.next6: +// CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK5-NEXT: call void [[TMP8]](i16 0, i32 [[TMP4]]) +// CHECK5-NEXT: br label [[DOTTERMINATE_PARALLEL]] +// CHECK5: .terminate.parallel: +// CHECK5-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK5-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK5: .barrier.parallel: +// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK5-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK5: .exit: +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29 +// CHECK5-SAME: (i32 [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK5-NEXT: [[CAPTURED_VARS_ADDRS7:%.*]] = alloca [0 x i8*], align 4 +// CHECK5-NEXT: [[CAPTURED_VARS_ADDRS8:%.*]] = alloca [0 x i8*], align 4 +// CHECK5-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK5-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK5-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK5-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK5-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK5-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK5: .worker: +// CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_worker() #[[ATTR3]] +// CHECK5-NEXT: br label [[DOTEXIT:%.*]] +// CHECK5: .mastercheck: +// CHECK5-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK5-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK5-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK5-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK5-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK5-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK5-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK5-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK5-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK5: .master: +// CHECK5-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK5-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK5-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK5-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK5-NEXT: call void @__kmpc_data_sharing_init_stack() +// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK5-NEXT: [[TMP6:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK5-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP6]], i32 0) +// CHECK5-NEXT: [[TMP7:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS7]] to i8** +// CHECK5-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 0, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP7]], i32 0) +// CHECK5-NEXT: [[TMP8:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS8]] to i8** +// CHECK5-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** [[TMP8]], i32 0) +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK5-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK5: .termination.notifier: +// CHECK5-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK5-NEXT: br label [[DOTEXIT]] +// CHECK5: .exit: +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK5-NEXT: store i32 42, i32* [[A]], align 4 +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined___wrapper +// CHECK5-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK5-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +// CHECK5-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK5-NEXT: store i32 43, i32* [[A]], align 4 +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper +// CHECK5-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK5-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +// CHECK5-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK5-NEXT: store i32 44, i32* [[A]], align 4 +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper +// CHECK5-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK5-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +// CHECK5-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_worker +// CHECK5-SAME: () #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK5-NEXT: store i8* null, i8** [[WORK_FN]], align 4 +// CHECK5-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK5-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK5: .await.work: +// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK5-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK5-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK5-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK5-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK5-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK5-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK5: .select.workers: +// CHECK5-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK5-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK5-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK5: .execute.parallel: +// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK5-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK5-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*) +// CHECK5-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] +// CHECK5: .execute.fn: +// CHECK5-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3]] +// CHECK5-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK5: .check.next: +// CHECK5-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK5-NEXT: call void [[TMP6]](i16 0, i32 [[TMP4]]) +// CHECK5-NEXT: br label [[DOTTERMINATE_PARALLEL]] +// CHECK5: .terminate.parallel: +// CHECK5-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK5-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK5: .barrier.parallel: +// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK5-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK5: .exit: +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46 +// CHECK5-SAME: (i32 [[N:%.*]], i32 [[A:%.*]], i32 [[AA:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK5-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK5-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 +// CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK5-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK5-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK5-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK5-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK5-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK5-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK5-NEXT: br i1 [[TMP1]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK5: .worker: +// CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_worker() #[[ATTR3]] +// CHECK5-NEXT: br label [[DOTEXIT:%.*]] +// CHECK5: .mastercheck: +// CHECK5-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK5-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK5-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK5-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK5-NEXT: [[TMP3:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK5-NEXT: [[TMP4:%.*]] = xor i32 [[TMP2]], -1 +// CHECK5-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP3]], [[TMP4]] +// CHECK5-NEXT: [[TMP5:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK5-NEXT: br i1 [[TMP5]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK5: .master: +// CHECK5-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK5-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK5-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK5-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK5-NEXT: call void @__kmpc_data_sharing_init_stack() +// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1000 +// CHECK5-NEXT: [[TMP8:%.*]] = zext i1 [[CMP]] to i32 +// CHECK5-NEXT: [[TMP9:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK5-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 [[TMP8]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP9]], i32 0) +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK5-NEXT: [[CONV7:%.*]] = sext i16 [[TMP11]] to i32 +// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[CONV7]], 1 +// CHECK5-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD8]] to i16 +// CHECK5-NEXT: store i16 [[CONV9]], i16* [[CONV]], align 4 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: store i32 [[ADD10]], i32* [[ARRAYIDX]], align 4 +// CHECK5-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK5: .termination.notifier: +// CHECK5-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK5-NEXT: br label [[DOTEXIT]] +// CHECK5: .exit: +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__3 +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK5-NEXT: store i32 45, i32* [[A]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper +// CHECK5-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK5-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +// CHECK5-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58_worker +// CHECK5-SAME: () #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK5-NEXT: store i8* null, i8** [[WORK_FN]], align 4 +// CHECK5-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK5-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK5: .await.work: +// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK5-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK5-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK5-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK5-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK5-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK5-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK5: .select.workers: +// CHECK5-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK5-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK5-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK5: .execute.parallel: +// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK5-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK5-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__4_wrapper to i8*) +// CHECK5-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] +// CHECK5: .execute.fn: +// CHECK5-NEXT: call void @__omp_outlined__4_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3]] +// CHECK5-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK5: .check.next: +// CHECK5-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK5-NEXT: call void [[TMP6]](i16 0, i32 [[TMP4]]) +// CHECK5-NEXT: br label [[DOTTERMINATE_PARALLEL]] +// CHECK5: .terminate.parallel: +// CHECK5-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK5-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK5: .barrier.parallel: +// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK5-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK5: .exit: +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58 +// CHECK5-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 +// CHECK5-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK5-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK5-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK5-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK5-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK5-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK5: .worker: +// CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58_worker() #[[ATTR3]] +// CHECK5-NEXT: br label [[DOTEXIT:%.*]] +// CHECK5: .mastercheck: +// CHECK5-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK5-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK5-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK5-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK5-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK5-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK5-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK5-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK5-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK5: .master: +// CHECK5-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK5-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK5-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK5-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK5-NEXT: call void @__kmpc_data_sharing_init_stack() +// CHECK5-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 4, i16 1) +// CHECK5-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty* +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK5-NEXT: [[A7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[A7]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP10:%.*]] = bitcast i32* [[A7]] to i8* +// CHECK5-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK5-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__4 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__4_wrapper to i8*), i8** [[TMP11]], i32 1) +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[A7]], align 4 +// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK5-NEXT: store i32 [[INC]], i32* [[A7]], align 4 +// CHECK5-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP5]]) +// CHECK5-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK5: .termination.notifier: +// CHECK5-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK5-NEXT: br label [[DOTEXIT]] +// CHECK5: .exit: +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__4 +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[CRITICAL_COUNTER:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_warp_active_thread_mask() +// CHECK5-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK5-NEXT: store i32 0, i32* [[CRITICAL_COUNTER]], align 4 +// CHECK5-NEXT: br label [[OMP_CRITICAL_LOOP:%.*]] +// CHECK5: omp.critical.loop: +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[CRITICAL_COUNTER]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], [[NVPTX_NUM_THREADS]] +// CHECK5-NEXT: br i1 [[TMP3]], label [[OMP_CRITICAL_TEST:%.*]], label [[OMP_CRITICAL_EXIT:%.*]] +// CHECK5: omp.critical.test: +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[CRITICAL_COUNTER]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = icmp eq i32 [[NVPTX_TID]], [[TMP4]] +// CHECK5-NEXT: br i1 [[TMP5]], label [[OMP_CRITICAL_BODY:%.*]], label [[OMP_CRITICAL_SYNC:%.*]] +// CHECK5: omp.critical.body: +// CHECK5-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK5-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @"_gomp_critical_user_$var") +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK5-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK5-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @"_gomp_critical_user_$var") +// CHECK5-NEXT: br label [[OMP_CRITICAL_SYNC]] +// CHECK5: omp.critical.sync: +// CHECK5-NEXT: call void @__kmpc_syncwarp(i32 [[TMP1]]) +// CHECK5-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK5-NEXT: store i32 [[TMP9]], i32* [[CRITICAL_COUNTER]], align 4 +// CHECK5-NEXT: br label [[OMP_CRITICAL_LOOP]] +// CHECK5: omp.critical.exit: +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__4_wrapper +// CHECK5-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK5-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +// CHECK5-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 +// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** +// CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4 +// CHECK5-NEXT: call void @__omp_outlined__4(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] +// CHECK5-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_worker +// CHECK1-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: store i8* null, i8** [[WORK_FN]], align 8 +// CHECK1-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK1-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK1: .await.work: +// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK1-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK1-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK1-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 +// CHECK1-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK1-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK1: .select.workers: +// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK1-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK1-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK1: .execute.parallel: +// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 8 +// CHECK1-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*) +// CHECK1-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] +// CHECK1: .execute.fn: +// CHECK1-NEXT: call void @__omp_outlined___wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK1: .check.next: +// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[WORK_FN]], align 8 +// CHECK1-NEXT: [[WORK_MATCH1:%.*]] = icmp eq i8* [[TMP6]], bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*) +// CHECK1-NEXT: br i1 [[WORK_MATCH1]], label [[DOTEXECUTE_FN2:%.*]], label [[DOTCHECK_NEXT3:%.*]] +// CHECK1: .execute.fn2: +// CHECK1-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3]] +// CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL]] +// CHECK1: .check.next3: +// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[WORK_FN]], align 8 +// CHECK1-NEXT: [[WORK_MATCH4:%.*]] = icmp eq i8* [[TMP7]], bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*) +// CHECK1-NEXT: br i1 [[WORK_MATCH4]], label [[DOTEXECUTE_FN5:%.*]], label [[DOTCHECK_NEXT6:%.*]] +// CHECK1: .execute.fn5: +// CHECK1-NEXT: call void @__omp_outlined__2_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3]] +// CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL]] +// CHECK1: .check.next6: +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK1-NEXT: call void [[TMP8]](i16 0, i32 [[TMP4]]) +// CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL]] +// CHECK1: .terminate.parallel: +// CHECK1-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK1-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK1: .barrier.parallel: +// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK1-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK1: .exit: +// CHECK1-NEXT: ret void // // -// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper -// CHECK4-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -// CHECK4-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK4-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK4-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK4-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -// CHECK4-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26 +// CHECK1-SAME: (i64 [[A:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS7:%.*]] = alloca [0 x i8*], align 8 +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS8:%.*]] = alloca [0 x i8*], align 8 +// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK1-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK1-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK1: .worker: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_worker() #[[ATTR3]] +// CHECK1-NEXT: br label [[DOTEXIT:%.*]] +// CHECK1: .mastercheck: +// CHECK1-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK1-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK1-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK1-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK1-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK1-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK1: .master: +// CHECK1-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK1-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP6]], i64 0) +// CHECK1-NEXT: [[TMP7:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS7]] to i8** +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 0, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP7]], i64 0) +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS8]] to i8** +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** [[TMP8]], i64 0) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK1-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK1: .termination.notifier: +// CHECK1-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK1-NEXT: br label [[DOTEXIT]] +// CHECK1: .exit: +// CHECK1-NEXT: ret void // // -// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_worker -// CHECK4-SAME: () #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 -// CHECK4-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK4-NEXT: store i8* null, i8** [[WORK_FN]], align 4 -// CHECK4-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK4-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK4: .await.work: -// CHECK4-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK4-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK4-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK4-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK4-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK4-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK4-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK4: .select.workers: -// CHECK4-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK4-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK4-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK4: .execute.parallel: -// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK4-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*) -// CHECK4-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] -// CHECK4: .execute.fn: -// CHECK4-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3]] -// CHECK4-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK4: .check.next: -// CHECK4-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK4-NEXT: call void [[TMP6]](i16 0, i32 [[TMP4]]) -// CHECK4-NEXT: br label [[DOTTERMINATE_PARALLEL]] -// CHECK4: .terminate.parallel: -// CHECK4-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK4-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK4: .barrier.parallel: -// CHECK4-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK4-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK4: .exit: -// CHECK4-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 42, i32* [[A]], align 4 +// CHECK1-NEXT: ret void // // -// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46 -// CHECK4-SAME: (i32 [[N:%.*]], i32 [[A:%.*]], i32 [[AA:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK4-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK4-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK4-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK4-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK4-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK4-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK4-NEXT: br i1 [[TMP1]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK4: .worker: -// CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_worker() #[[ATTR3]] -// CHECK4-NEXT: br label [[DOTEXIT:%.*]] -// CHECK4: .mastercheck: -// CHECK4-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK4-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK4-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK4-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK4-NEXT: [[TMP3:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK4-NEXT: [[TMP4:%.*]] = xor i32 [[TMP2]], -1 -// CHECK4-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP3]], [[TMP4]] -// CHECK4-NEXT: [[TMP5:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK4-NEXT: br i1 [[TMP5]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK4: .master: -// CHECK4-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK4-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK4-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK4-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK4-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK4-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1000 -// CHECK4-NEXT: [[TMP8:%.*]] = zext i1 [[CMP]] to i32 -// CHECK4-NEXT: [[TMP9:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 [[TMP8]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP9]], i32 0) -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV]], align 4 -// CHECK4-NEXT: [[CONV7:%.*]] = sext i16 [[TMP11]] to i32 -// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[CONV7]], 1 -// CHECK4-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD8]] to i16 -// CHECK4-NEXT: store i16 [[CONV9]], i16* [[CONV]], align 4 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK4-NEXT: store i32 [[ADD10]], i32* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK4: .termination.notifier: -// CHECK4-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK4-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK4-NEXT: br label [[DOTEXIT]] -// CHECK4: .exit: -// CHECK4-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined___wrapper +// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +// CHECK1-NEXT: ret void // // -// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32 45, i32* [[A]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]]) -// CHECK4-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 43, i32* [[A]], align 4 +// CHECK1-NEXT: ret void // // -// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -// CHECK4-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -// CHECK4-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK4-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK4-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK4-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -// CHECK4-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper +// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +// CHECK1-NEXT: ret void // // -// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58_worker -// CHECK4-SAME: () #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 -// CHECK4-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK4-NEXT: store i8* null, i8** [[WORK_FN]], align 4 -// CHECK4-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK4-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK4: .await.work: -// CHECK4-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK4-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK4-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK4-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK4-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK4-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK4-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK4: .select.workers: -// CHECK4-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK4-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK4-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK4: .execute.parallel: -// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK4-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__4_wrapper to i8*) -// CHECK4-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] -// CHECK4: .execute.fn: -// CHECK4-NEXT: call void @__omp_outlined__4_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3]] -// CHECK4-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK4: .check.next: -// CHECK4-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK4-NEXT: call void [[TMP6]](i16 0, i32 [[TMP4]]) -// CHECK4-NEXT: br label [[DOTTERMINATE_PARALLEL]] -// CHECK4: .terminate.parallel: -// CHECK4-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK4-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK4: .barrier.parallel: -// CHECK4-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK4-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK4: .exit: -// CHECK4-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 44, i32* [[A]], align 4 +// CHECK1-NEXT: ret void // // -// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58 -// CHECK4-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 -// CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK4-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK4-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK4-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK4-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK4: .worker: -// CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58_worker() #[[ATTR3]] -// CHECK4-NEXT: br label [[DOTEXIT:%.*]] -// CHECK4: .mastercheck: -// CHECK4-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK4-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK4-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK4-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK4-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK4-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK4-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK4-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK4-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK4: .master: -// CHECK4-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK4-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK4-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK4-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK4-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK4-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 4, i16 1) -// CHECK4-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty* -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[A7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP6]], i32 0, i32 0 -// CHECK4-NEXT: store i32 [[TMP7]], i32* [[A7]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP10:%.*]] = bitcast i32* [[A7]] to i8* -// CHECK4-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__4 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__4_wrapper to i8*), i8** [[TMP11]], i32 1) -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[A7]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK4-NEXT: store i32 [[INC]], i32* [[A7]], align 4 -// CHECK4-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP5]]) -// CHECK4-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK4: .termination.notifier: -// CHECK4-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK4-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK4-NEXT: br label [[DOTEXIT]] -// CHECK4: .exit: -// CHECK4-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper +// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_worker +// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: store i8* null, i8** [[WORK_FN]], align 8 +// CHECK1-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK1-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK1: .await.work: +// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK1-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK1-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK1-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 +// CHECK1-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK1-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK1: .select.workers: +// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK1-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK1-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK1: .execute.parallel: +// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 8 +// CHECK1-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*) +// CHECK1-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] +// CHECK1: .execute.fn: +// CHECK1-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3]] +// CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK1: .check.next: +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK1-NEXT: call void [[TMP6]](i16 0, i32 [[TMP4]]) +// CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL]] +// CHECK1: .terminate.parallel: +// CHECK1-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK1-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK1: .barrier.parallel: +// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK1-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK1: .exit: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 +// CHECK1-SAME: (i64 [[N:%.*]], i64 [[A:%.*]], i64 [[AA:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 8 +// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[AA]], i64* [[AA_ADDR]], align 8 +// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[AA_ADDR]] to i16* +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK1-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK1-NEXT: br i1 [[TMP1]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK1: .worker: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_worker() #[[ATTR3]] +// CHECK1-NEXT: br label [[DOTEXIT:%.*]] +// CHECK1: .mastercheck: +// CHECK1-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK1-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE5]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], 1 +// CHECK1-NEXT: [[TMP4:%.*]] = xor i32 [[TMP2]], -1 +// CHECK1-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP3]], [[TMP4]] +// CHECK1-NEXT: [[TMP5:%.*]] = icmp eq i32 [[NVPTX_TID3]], [[MASTER_TID]] +// CHECK1-NEXT: br i1 [[TMP5]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK1: .master: +// CHECK1-NEXT: [[NVPTX_NUM_THREADS6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[NVPTX_WARP_SIZE7:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[THREAD_LIMIT8:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS6]], [[NVPTX_WARP_SIZE7]] +// CHECK1-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT8]], i16 1) +// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1000 +// CHECK1-NEXT: [[TMP8:%.*]] = zext i1 [[CMP]] to i32 +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 [[TMP8]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP9]], i64 0) +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK1-NEXT: [[CONV9:%.*]] = sext i16 [[TMP11]] to i32 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[CONV9]], 1 +// CHECK1-NEXT: [[CONV11:%.*]] = trunc i32 [[ADD10]] to i16 +// CHECK1-NEXT: store i16 [[CONV11]], i16* [[CONV2]], align 8 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK1: .termination.notifier: +// CHECK1-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK1-NEXT: br label [[DOTEXIT]] +// CHECK1: .exit: +// CHECK1-NEXT: ret void // // -// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[CRITICAL_COUNTER:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_warp_active_thread_mask() -// CHECK4-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK4-NEXT: store i32 0, i32* [[CRITICAL_COUNTER]], align 4 -// CHECK4-NEXT: br label [[OMP_CRITICAL_LOOP:%.*]] -// CHECK4: omp.critical.loop: -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[CRITICAL_COUNTER]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], [[NVPTX_NUM_THREADS]] -// CHECK4-NEXT: br i1 [[TMP3]], label [[OMP_CRITICAL_TEST:%.*]], label [[OMP_CRITICAL_EXIT:%.*]] -// CHECK4: omp.critical.test: -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[CRITICAL_COUNTER]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = icmp eq i32 [[NVPTX_TID]], [[TMP4]] -// CHECK4-NEXT: br i1 [[TMP5]], label [[OMP_CRITICAL_BODY:%.*]], label [[OMP_CRITICAL_SYNC:%.*]] -// CHECK4: omp.critical.body: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK4-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @"_gomp_critical_user_$var") -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK4-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 -// CHECK4-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @"_gomp_critical_user_$var") -// CHECK4-NEXT: br label [[OMP_CRITICAL_SYNC]] -// CHECK4: omp.critical.sync: -// CHECK4-NEXT: call void @__kmpc_syncwarp(i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK4-NEXT: store i32 [[TMP9]], i32* [[CRITICAL_COUNTER]], align 4 -// CHECK4-NEXT: br label [[OMP_CRITICAL_LOOP]] -// CHECK4: omp.critical.exit: -// CHECK4-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3 +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32 45, i32* [[A]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK1-NEXT: ret void // // -// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__4_wrapper -// CHECK4-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -// CHECK4-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK4-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK4-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK4-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 -// CHECK4-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** -// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__4(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] -// CHECK4-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper +// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +// CHECK1-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +// CHECK1-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_worker -// CHECK5-SAME: () #[[ATTR0:[0-9]+]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK5-NEXT: store i8* null, i8** [[WORK_FN]], align 4 -// CHECK5-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK5-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK5: .await.work: -// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK5-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK5-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK5-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK5-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK5-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK5-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK5: .select.workers: -// CHECK5-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK5-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK5-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK5: .execute.parallel: -// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) -// CHECK5-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK5-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*) -// CHECK5-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] -// CHECK5: .execute.fn: -// CHECK5-NEXT: call void @__omp_outlined___wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3:[0-9]+]] -// CHECK5-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK5: .check.next: -// CHECK5-NEXT: [[TMP6:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK5-NEXT: [[WORK_MATCH1:%.*]] = icmp eq i8* [[TMP6]], bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*) -// CHECK5-NEXT: br i1 [[WORK_MATCH1]], label [[DOTEXECUTE_FN2:%.*]], label [[DOTCHECK_NEXT3:%.*]] -// CHECK5: .execute.fn2: -// CHECK5-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3]] -// CHECK5-NEXT: br label [[DOTTERMINATE_PARALLEL]] -// CHECK5: .check.next3: -// CHECK5-NEXT: [[TMP7:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK5-NEXT: [[WORK_MATCH4:%.*]] = icmp eq i8* [[TMP7]], bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*) -// CHECK5-NEXT: br i1 [[WORK_MATCH4]], label [[DOTEXECUTE_FN5:%.*]], label [[DOTCHECK_NEXT6:%.*]] -// CHECK5: .execute.fn5: -// CHECK5-NEXT: call void @__omp_outlined__2_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3]] -// CHECK5-NEXT: br label [[DOTTERMINATE_PARALLEL]] -// CHECK5: .check.next6: -// CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK5-NEXT: call void [[TMP8]](i16 0, i32 [[TMP4]]) -// CHECK5-NEXT: br label [[DOTTERMINATE_PARALLEL]] -// CHECK5: .terminate.parallel: -// CHECK5-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK5-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK5: .barrier.parallel: -// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK5-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK5: .exit: -// CHECK5-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_worker +// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: store i8* null, i8** [[WORK_FN]], align 8 +// CHECK1-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK1-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK1: .await.work: +// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK1-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK1-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK1-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 +// CHECK1-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK1-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK1: .select.workers: +// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK1-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK1-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK1: .execute.parallel: +// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 8 +// CHECK1-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__4_wrapper to i8*) +// CHECK1-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] +// CHECK1: .execute.fn: +// CHECK1-NEXT: call void @__omp_outlined__4_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3]] +// CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK1: .check.next: +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK1-NEXT: call void [[TMP6]](i16 0, i32 [[TMP4]]) +// CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL]] +// CHECK1: .terminate.parallel: +// CHECK1-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK1-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK1: .barrier.parallel: +// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK1-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK1: .exit: +// CHECK1-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29 -// CHECK5-SAME: (i32 [[A:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK5-NEXT: [[CAPTURED_VARS_ADDRS7:%.*]] = alloca [0 x i8*], align 4 -// CHECK5-NEXT: [[CAPTURED_VARS_ADDRS8:%.*]] = alloca [0 x i8*], align 4 -// CHECK5-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK5-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK5-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK5-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK5-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK5-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK5: .worker: -// CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l29_worker() #[[ATTR3]] -// CHECK5-NEXT: br label [[DOTEXIT:%.*]] -// CHECK5: .mastercheck: -// CHECK5-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK5-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK5-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK5-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK5-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK5-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK5-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK5-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK5-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK5: .master: -// CHECK5-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK5-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK5-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK5-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK5-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK5-NEXT: [[TMP6:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK5-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP6]], i32 0) -// CHECK5-NEXT: [[TMP7:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS7]] to i8** -// CHECK5-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 0, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP7]], i32 0) -// CHECK5-NEXT: [[TMP8:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS8]] to i8** -// CHECK5-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** [[TMP8]], i32 0) -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK5-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK5: .termination.notifier: -// CHECK5-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK5-NEXT: br label [[DOTEXIT]] -// CHECK5: .exit: -// CHECK5-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55 +// CHECK1-SAME: (i64 [[A:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 +// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK1-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK1-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK1: .worker: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_worker() #[[ATTR3]] +// CHECK1-NEXT: br label [[DOTEXIT:%.*]] +// CHECK1: .mastercheck: +// CHECK1-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK1-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK1-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK1-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK1-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK1-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK1: .master: +// CHECK1-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK1-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[A7:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: [[A_ON_STACK:%.*]] = bitcast i8* [[A7]] to i32* +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[A_ON_STACK]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32* [[A_ON_STACK]] to i8* +// CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__4 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__4_wrapper to i8*), i8** [[TMP9]], i64 1) +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[A_ON_STACK]], align 4 +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[A7]]) +// CHECK1-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK1: .termination.notifier: +// CHECK1-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK1-NEXT: br label [[DOTEXIT]] +// CHECK1: .exit: +// CHECK1-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32 42, i32* [[A]], align 4 -// CHECK5-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__4 +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[CRITICAL_COUNTER:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_warp_active_thread_mask() +// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: store i32 0, i32* [[CRITICAL_COUNTER]], align 4 +// CHECK1-NEXT: br label [[OMP_CRITICAL_LOOP:%.*]] +// CHECK1: omp.critical.loop: +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CRITICAL_COUNTER]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], [[NVPTX_NUM_THREADS]] +// CHECK1-NEXT: br i1 [[TMP3]], label [[OMP_CRITICAL_TEST:%.*]], label [[OMP_CRITICAL_EXIT:%.*]] +// CHECK1: omp.critical.test: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CRITICAL_COUNTER]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = icmp eq i32 [[NVPTX_TID]], [[TMP4]] +// CHECK1-NEXT: br i1 [[TMP5]], label [[OMP_CRITICAL_BODY:%.*]], label [[OMP_CRITICAL_SYNC:%.*]] +// CHECK1: omp.critical.body: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @"_gomp_critical_user_$var") +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @"_gomp_critical_user_$var") +// CHECK1-NEXT: br label [[OMP_CRITICAL_SYNC]] +// CHECK1: omp.critical.sync: +// CHECK1-NEXT: call void @__kmpc_syncwarp(i32 [[TMP1]]) +// CHECK1-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[CRITICAL_COUNTER]], align 4 +// CHECK1-NEXT: br label [[OMP_CRITICAL_LOOP]] +// CHECK1: omp.critical.exit: +// CHECK1-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined___wrapper -// CHECK5-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK5-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK5-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -// CHECK5-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__4_wrapper +// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +// CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 +// CHECK1-NEXT: call void @__omp_outlined__4(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] +// CHECK1-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32 43, i32* [[A]], align 4 -// CHECK5-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_worker +// CHECK2-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK2-NEXT: store i8* null, i8** [[WORK_FN]], align 4 +// CHECK2-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK2-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK2: .await.work: +// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK2-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK2-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK2-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK2-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK2-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK2: .select.workers: +// CHECK2-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK2-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK2-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK2: .execute.parallel: +// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK2-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK2-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*) +// CHECK2-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] +// CHECK2: .execute.fn: +// CHECK2-NEXT: call void @__omp_outlined___wrapper(i16 0, i32 [[TMP4]]) #[[ATTR2:[0-9]+]] +// CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK2: .check.next: +// CHECK2-NEXT: [[TMP6:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK2-NEXT: [[WORK_MATCH1:%.*]] = icmp eq i8* [[TMP6]], bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*) +// CHECK2-NEXT: br i1 [[WORK_MATCH1]], label [[DOTEXECUTE_FN2:%.*]], label [[DOTCHECK_NEXT3:%.*]] +// CHECK2: .execute.fn2: +// CHECK2-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR2]] +// CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL]] +// CHECK2: .check.next3: +// CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK2-NEXT: [[WORK_MATCH4:%.*]] = icmp eq i8* [[TMP7]], bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*) +// CHECK2-NEXT: br i1 [[WORK_MATCH4]], label [[DOTEXECUTE_FN5:%.*]], label [[DOTCHECK_NEXT6:%.*]] +// CHECK2: .execute.fn5: +// CHECK2-NEXT: call void @__omp_outlined__2_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR2]] +// CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL]] +// CHECK2: .check.next6: +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK2-NEXT: call void [[TMP8]](i16 0, i32 [[TMP4]]) +// CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL]] +// CHECK2: .terminate.parallel: +// CHECK2-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK2-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK2: .barrier.parallel: +// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK2-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK2: .exit: +// CHECK2-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK5-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK5-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK5-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -// CHECK5-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26 +// CHECK2-SAME: (i32 [[A:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS7:%.*]] = alloca [0 x i8*], align 4 +// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS8:%.*]] = alloca [0 x i8*], align 4 +// CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK2-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK2-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK2: .worker: +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26_worker() #[[ATTR2]] +// CHECK2-NEXT: br label [[DOTEXIT:%.*]] +// CHECK2: .mastercheck: +// CHECK2-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK2-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK2-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK2-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK2-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK2-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK2: .master: +// CHECK2-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: [[TMP6:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP6]], i32 0) +// CHECK2-NEXT: [[TMP7:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS7]] to i8** +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 0, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP7]], i32 0) +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS8]] to i8** +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** [[TMP8]], i32 0) +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP9]], 1 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK2-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK2: .termination.notifier: +// CHECK2-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK2-NEXT: br label [[DOTEXIT]] +// CHECK2: .exit: +// CHECK2-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32 44, i32* [[A]], align 4 -// CHECK5-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32 42, i32* [[A]], align 4 +// CHECK2-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper -// CHECK5-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK5-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK5-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -// CHECK5-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined___wrapper +// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32 43, i32* [[A]], align 4 +// CHECK2-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_worker -// CHECK5-SAME: () #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK5-NEXT: store i8* null, i8** [[WORK_FN]], align 4 -// CHECK5-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK5-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK5: .await.work: -// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK5-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK5-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK5-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK5-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK5-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK5-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK5: .select.workers: -// CHECK5-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK5-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK5-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK5: .execute.parallel: -// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK5-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK5-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*) -// CHECK5-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] -// CHECK5: .execute.fn: -// CHECK5-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3]] -// CHECK5-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK5: .check.next: -// CHECK5-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK5-NEXT: call void [[TMP6]](i16 0, i32 [[TMP4]]) -// CHECK5-NEXT: br label [[DOTTERMINATE_PARALLEL]] -// CHECK5: .terminate.parallel: -// CHECK5-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK5-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK5: .barrier.parallel: -// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK5-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK5: .exit: -// CHECK5-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper +// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK2-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46 -// CHECK5-SAME: (i32 [[N:%.*]], i32 [[A:%.*]], i32 [[AA:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK5-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 -// CHECK5-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 -// CHECK5-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* -// CHECK5-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK5-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK5-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK5-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK5-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK5-NEXT: br i1 [[TMP1]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK5: .worker: -// CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l46_worker() #[[ATTR3]] -// CHECK5-NEXT: br label [[DOTEXIT:%.*]] -// CHECK5: .mastercheck: -// CHECK5-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK5-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK5-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK5-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK5-NEXT: [[TMP3:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK5-NEXT: [[TMP4:%.*]] = xor i32 [[TMP2]], -1 -// CHECK5-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP3]], [[TMP4]] -// CHECK5-NEXT: [[TMP5:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK5-NEXT: br i1 [[TMP5]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK5: .master: -// CHECK5-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK5-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK5-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK5-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK5-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1000 -// CHECK5-NEXT: [[TMP8:%.*]] = zext i1 [[CMP]] to i32 -// CHECK5-NEXT: [[TMP9:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK5-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 [[TMP8]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP9]], i32 0) -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK5-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV]], align 4 -// CHECK5-NEXT: [[CONV7:%.*]] = sext i16 [[TMP11]] to i32 -// CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[CONV7]], 1 -// CHECK5-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD8]] to i16 -// CHECK5-NEXT: store i16 [[CONV9]], i16* [[CONV]], align 4 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK5-NEXT: store i32 [[ADD10]], i32* [[ARRAYIDX]], align 4 -// CHECK5-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK5: .termination.notifier: -// CHECK5-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK5-NEXT: br label [[DOTEXIT]] -// CHECK5: .exit: -// CHECK5-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32 44, i32* [[A]], align 4 +// CHECK2-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[A:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32 45, i32* [[A]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]]) -// CHECK5-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__2_wrapper +// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK2-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper -// CHECK5-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK5-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK5-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -// CHECK5-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_worker +// CHECK2-SAME: () #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK2-NEXT: store i8* null, i8** [[WORK_FN]], align 4 +// CHECK2-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK2-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK2: .await.work: +// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK2-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK2-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK2-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK2-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK2-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK2: .select.workers: +// CHECK2-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK2-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK2-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK2: .execute.parallel: +// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK2-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*) +// CHECK2-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] +// CHECK2: .execute.fn: +// CHECK2-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR2]] +// CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK2: .check.next: +// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK2-NEXT: call void [[TMP6]](i16 0, i32 [[TMP4]]) +// CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL]] +// CHECK2: .terminate.parallel: +// CHECK2-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK2-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK2: .barrier.parallel: +// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK2-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK2: .exit: +// CHECK2-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58_worker -// CHECK5-SAME: () #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK5-NEXT: store i8* null, i8** [[WORK_FN]], align 4 -// CHECK5-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK5-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK5: .await.work: -// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK5-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK5-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK5-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK5-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK5-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK5-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK5: .select.workers: -// CHECK5-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK5-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK5-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK5: .execute.parallel: -// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK5-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK5-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__4_wrapper to i8*) -// CHECK5-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] -// CHECK5: .execute.fn: -// CHECK5-NEXT: call void @__omp_outlined__4_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3]] -// CHECK5-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK5: .check.next: -// CHECK5-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK5-NEXT: call void [[TMP6]](i16 0, i32 [[TMP4]]) -// CHECK5-NEXT: br label [[DOTTERMINATE_PARALLEL]] -// CHECK5: .terminate.parallel: -// CHECK5-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK5-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK5: .barrier.parallel: -// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK5-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK5: .exit: -// CHECK5-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 +// CHECK2-SAME: (i32 [[N:%.*]], i32 [[A:%.*]], i32 [[AA:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [0 x i8*], align 4 +// CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[AA]], i32* [[AA_ADDR]], align 4 +// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* +// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK2-NEXT: br i1 [[TMP1]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK2: .worker: +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43_worker() #[[ATTR2]] +// CHECK2-NEXT: br label [[DOTEXIT:%.*]] +// CHECK2: .mastercheck: +// CHECK2-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK2-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK2-NEXT: [[TMP3:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK2-NEXT: [[TMP4:%.*]] = xor i32 [[TMP2]], -1 +// CHECK2-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP3]], [[TMP4]] +// CHECK2-NEXT: [[TMP5:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK2-NEXT: br i1 [[TMP5]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK2: .master: +// CHECK2-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK2-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 1000 +// CHECK2-NEXT: [[TMP8:%.*]] = zext i1 [[CMP]] to i32 +// CHECK2-NEXT: [[TMP9:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 [[TMP8]], i32 -1, i32 -1, i8* bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP9]], i32 0) +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK2-NEXT: [[CONV7:%.*]] = sext i16 [[TMP11]] to i32 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[CONV7]], 1 +// CHECK2-NEXT: [[CONV9:%.*]] = trunc i32 [[ADD8]] to i16 +// CHECK2-NEXT: store i16 [[CONV9]], i16* [[CONV]], align 4 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: store i32 [[ADD10]], i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK2: .termination.notifier: +// CHECK2-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK2-NEXT: br label [[DOTEXIT]] +// CHECK2: .exit: +// CHECK2-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58 -// CHECK5-SAME: (i32 [[A:%.*]]) #[[ATTR1]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 -// CHECK5-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK5-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK5-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK5-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK5-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK5-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK5: .worker: -// CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l58_worker() #[[ATTR3]] -// CHECK5-NEXT: br label [[DOTEXIT:%.*]] -// CHECK5: .mastercheck: -// CHECK5-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK5-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK5-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK5-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK5-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK5-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK5-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK5-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK5-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK5: .master: -// CHECK5-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK5-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK5-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK5-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK5-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK5-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 4, i16 1) -// CHECK5-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty* -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[A_ADDR]], align 4 -// CHECK5-NEXT: [[A7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP6]], i32 0, i32 0 -// CHECK5-NEXT: store i32 [[TMP7]], i32* [[A7]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP10:%.*]] = bitcast i32* [[A7]] to i8* -// CHECK5-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK5-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__4 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__4_wrapper to i8*), i8** [[TMP11]], i32 1) -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[A7]], align 4 -// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK5-NEXT: store i32 [[INC]], i32* [[A7]], align 4 -// CHECK5-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP5]]) -// CHECK5-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK5: .termination.notifier: -// CHECK5-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK5-NEXT: br label [[DOTEXIT]] -// CHECK5: .exit: -// CHECK5-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__3 +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[A:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32 45, i32* [[A]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP1]]) +// CHECK2-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR1]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[CRITICAL_COUNTER:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_warp_active_thread_mask() -// CHECK5-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK5-NEXT: store i32 0, i32* [[CRITICAL_COUNTER]], align 4 -// CHECK5-NEXT: br label [[OMP_CRITICAL_LOOP:%.*]] -// CHECK5: omp.critical.loop: -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[CRITICAL_COUNTER]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], [[NVPTX_NUM_THREADS]] -// CHECK5-NEXT: br i1 [[TMP3]], label [[OMP_CRITICAL_TEST:%.*]], label [[OMP_CRITICAL_EXIT:%.*]] -// CHECK5: omp.critical.test: -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[CRITICAL_COUNTER]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = icmp eq i32 [[NVPTX_TID]], [[TMP4]] -// CHECK5-NEXT: br i1 [[TMP5]], label [[OMP_CRITICAL_BODY:%.*]], label [[OMP_CRITICAL_SYNC:%.*]] -// CHECK5: omp.critical.body: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK5-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @"_gomp_critical_user_$var") -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK5-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 -// CHECK5-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @"_gomp_critical_user_$var") -// CHECK5-NEXT: br label [[OMP_CRITICAL_SYNC]] -// CHECK5: omp.critical.sync: -// CHECK5-NEXT: call void @__kmpc_syncwarp(i32 [[TMP1]]) -// CHECK5-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP4]], 1 -// CHECK5-NEXT: store i32 [[TMP9]], i32* [[CRITICAL_COUNTER]], align 4 -// CHECK5-NEXT: br label [[OMP_CRITICAL_LOOP]] -// CHECK5: omp.critical.exit: -// CHECK5-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper +// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +// CHECK2-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] +// CHECK2-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__4_wrapper -// CHECK5-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK5-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK5-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 -// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** -// CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4 -// CHECK5-NEXT: call void @__omp_outlined__4(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] -// CHECK5-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_worker +// CHECK2-SAME: () #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK2-NEXT: store i8* null, i8** [[WORK_FN]], align 4 +// CHECK2-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK2-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK2: .await.work: +// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK2-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK2-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK2-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK2-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK2-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK2: .select.workers: +// CHECK2-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK2-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK2-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK2: .execute.parallel: +// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK2-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__4_wrapper to i8*) +// CHECK2-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] +// CHECK2: .execute.fn: +// CHECK2-NEXT: call void @__omp_outlined__4_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR2]] +// CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK2: .check.next: +// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK2-NEXT: call void [[TMP6]](i16 0, i32 [[TMP4]]) +// CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL]] +// CHECK2: .terminate.parallel: +// CHECK2-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK2-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK2: .barrier.parallel: +// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK2-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK2: .exit: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55 +// CHECK2-SAME: (i32 [[A:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 +// CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK2-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK2-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK2: .worker: +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l55_worker() #[[ATTR2]] +// CHECK2-NEXT: br label [[DOTEXIT:%.*]] +// CHECK2: .mastercheck: +// CHECK2-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK2-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK2-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK2-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK2-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK2-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK2: .master: +// CHECK2-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK2-NEXT: [[A7:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[A_ON_STACK:%.*]] = bitcast i8* [[A7]] to i32* +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[A_ON_STACK]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i32* [[A_ON_STACK]] to i8* +// CHECK2-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__4 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__4_wrapper to i8*), i8** [[TMP9]], i32 1) +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[A_ON_STACK]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK2-NEXT: store i32 [[INC]], i32* [[A_ON_STACK]], align 4 +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[A7]]) +// CHECK2-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK2: .termination.notifier: +// CHECK2-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK2-NEXT: br label [[DOTEXIT]] +// CHECK2: .exit: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__4 +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[CRITICAL_COUNTER:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_warp_active_thread_mask() +// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: store i32 0, i32* [[CRITICAL_COUNTER]], align 4 +// CHECK2-NEXT: br label [[OMP_CRITICAL_LOOP:%.*]] +// CHECK2: omp.critical.loop: +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CRITICAL_COUNTER]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = icmp slt i32 [[TMP2]], [[NVPTX_NUM_THREADS]] +// CHECK2-NEXT: br i1 [[TMP3]], label [[OMP_CRITICAL_TEST:%.*]], label [[OMP_CRITICAL_EXIT:%.*]] +// CHECK2: omp.critical.test: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CRITICAL_COUNTER]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = icmp eq i32 [[NVPTX_TID]], [[TMP4]] +// CHECK2-NEXT: br i1 [[TMP5]], label [[OMP_CRITICAL_BODY:%.*]], label [[OMP_CRITICAL_SYNC:%.*]] +// CHECK2: omp.critical.body: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK2-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @"_gomp_critical_user_$var") +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP8]], 1 +// CHECK2-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK2-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], [8 x i32]* @"_gomp_critical_user_$var") +// CHECK2-NEXT: br label [[OMP_CRITICAL_SYNC]] +// CHECK2: omp.critical.sync: +// CHECK2-NEXT: call void @__kmpc_syncwarp(i32 [[TMP1]]) +// CHECK2-NEXT: [[TMP9:%.*]] = add nsw i32 [[TMP4]], 1 +// CHECK2-NEXT: store i32 [[TMP9]], i32* [[CRITICAL_COUNTER]], align 4 +// CHECK2-NEXT: br label [[OMP_CRITICAL_LOOP]] +// CHECK2: omp.critical.exit: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__4_wrapper +// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +// CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** +// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4 +// CHECK2-NEXT: call void @__omp_outlined__4(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR2]] +// CHECK2-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp index 978967250658e..f42931ec87b10 100644 --- a/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_parallel_for_codegen.cpp @@ -1,8 +1,7 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ // Test target codegen - host bc file has to be created first. // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK1 -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK2 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -disable-llvm-optzns | FileCheck %s --check-prefix=CHECK // expected-no-diagnostics #ifndef HEADER #define HEADER @@ -73,8 +72,6 @@ int bar(int n){ // CHECK1-NEXT: br label [[DOTAWAIT_WORK]] // CHECK1: .exit: // CHECK1-NEXT: ret void -// -// // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l14 // CHECK1-SAME: (i64 [[N:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK1-NEXT: entry: @@ -141,8 +138,6 @@ int bar(int n){ // CHECK1-NEXT: br label [[DOTEXIT]] // CHECK1: .exit: // CHECK1-NEXT: ret void -// -// // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], i32* nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { // CHECK1-NEXT: entry: @@ -231,8 +226,6 @@ int bar(int n){ // CHECK1: omp.dispatch.end: // CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) // CHECK1-NEXT: ret void -// -// // CHECK1-LABEL: define {{[^@]+}}@__omp_outlined___wrapper // CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: @@ -253,8 +246,6 @@ int bar(int n){ // CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 // CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP5]], i32* [[TMP8]]) #[[ATTR3]] // CHECK1-NEXT: ret void -// -// // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l14_worker // CHECK2-SAME: () #[[ATTR0:[0-9]+]] { // CHECK2-NEXT: entry: @@ -295,8 +286,6 @@ int bar(int n){ // CHECK2-NEXT: br label [[DOTAWAIT_WORK]] // CHECK2: .exit: // CHECK2-NEXT: ret void -// -// // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l14 // CHECK2-SAME: (i64 [[N:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK2-NEXT: entry: @@ -358,8 +347,6 @@ int bar(int n){ // CHECK2-NEXT: br label [[DOTEXIT]] // CHECK2: .exit: // CHECK2-NEXT: ret void -// -// // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], i32* nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { // CHECK2-NEXT: entry: @@ -448,8 +435,6 @@ int bar(int n){ // CHECK2: omp.dispatch.end: // CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) // CHECK2-NEXT: ret void -// -// // CHECK2-LABEL: define {{[^@]+}}@__omp_outlined___wrapper // CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { // CHECK2-NEXT: entry: @@ -470,4 +455,217 @@ int bar(int n){ // CHECK2-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 // CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP5]], i32* [[TMP8]]) #[[ATTR3]] // CHECK2-NEXT: ret void +// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l13_worker +// CHECK-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 +// CHECK-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK-NEXT: store i8* null, i8** [[WORK_FN]], align 8 +// CHECK-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK: .await.work: +// CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 +// CHECK-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK: .select.workers: +// CHECK-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK: .execute.parallel: +// CHECK-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) +// CHECK-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 8 +// CHECK-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*) +// CHECK-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] +// CHECK: .execute.fn: +// CHECK-NEXT: call void @__omp_outlined___wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3:[0-9]+]] +// CHECK-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK: .check.next: +// CHECK-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK-NEXT: call void [[TMP6]](i16 0, i32 [[TMP4]]) +// CHECK-NEXT: br label [[DOTTERMINATE_PARALLEL]] +// CHECK: .terminate.parallel: +// CHECK-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK: .barrier.parallel: +// CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK: .exit: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l13 +// CHECK-SAME: (i64 [[N:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK-NEXT: br i1 [[TMP1]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK: .worker: +// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l13_worker() #[[ATTR3]] +// CHECK-NEXT: br label [[DOTEXIT:%.*]] +// CHECK: .mastercheck: +// CHECK-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK-NEXT: [[TMP3:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK-NEXT: [[TMP4:%.*]] = xor i32 [[TMP2]], -1 +// CHECK-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP3]], [[TMP4]] +// CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK-NEXT: br i1 [[TMP5]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK: .master: +// CHECK-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK-NEXT: [[D:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D]] to i32* +// CHECK-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK-NEXT: store i32 [[TMP7]], i32* [[D_ON_STACK]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK-NEXT: [[TMP9:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 8 +// CHECK-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK-NEXT: [[TMP11:%.*]] = bitcast i32* [[D_ON_STACK]] to i8* +// CHECK-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 +// CHECK-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, [10 x i32]*, i32*)* @__omp_outlined__ to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined___wrapper to i8*), i8** [[TMP12]], i64 2) +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 3 +// CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK-NEXT: store i32 [[ADD]], i32* [[ARRAYIDX]], align 4 +// CHECK-NEXT: call void @__kmpc_free_shared(i8* [[D]]) +// CHECK-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK: .termination.notifier: +// CHECK-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK-NEXT: br label [[DOTEXIT]] +// CHECK: .exit: +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]], i32* nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK-NEXT: [[D_ADDR:%.*]] = alloca i32*, align 8 +// CHECK-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK-NEXT: store i32* [[D]], i32** [[D_ADDR]], align 8 +// CHECK-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK-NEXT: [[TMP1:%.*]] = load i32*, i32** [[D_ADDR]], align 8 +// CHECK-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK: omp.dispatch.cond: +// CHECK-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK: cond.true: +// CHECK-NEXT: br label [[COND_END:%.*]] +// CHECK: cond.false: +// CHECK-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-NEXT: br label [[COND_END]] +// CHECK: cond.end: +// CHECK-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK: omp.dispatch.body: +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK: omp.inner.for.cond: +// CHECK-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] +// CHECK-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK: omp.inner.for.body: +// CHECK-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 +// CHECK-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4 +// CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK-NEXT: [[TMP14:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], [[TMP12]] +// CHECK-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 +// CHECK-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK: omp.body.continue: +// CHECK-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK: omp.inner.for.inc: +// CHECK-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK: omp.inner.for.end: +// CHECK-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK: omp.dispatch.inc: +// CHECK-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_LB]], align 4 +// CHECK-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_UB]], align 4 +// CHECK-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK: omp.dispatch.end: +// CHECK-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK-NEXT: ret void +// +// +// CHECK-LABEL: define {{[^@]+}}@__omp_outlined___wrapper +// CHECK-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK-NEXT: entry: +// CHECK-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +// CHECK-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +// CHECK-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +// CHECK-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +// CHECK-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 +// CHECK-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 +// CHECK-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to [10 x i32]** +// CHECK-NEXT: [[TMP5:%.*]] = load [10 x i32]*, [10 x i32]** [[TMP4]], align 8 +// CHECK-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1 +// CHECK-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32** +// CHECK-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8 +// CHECK-NEXT: call void @__omp_outlined__(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP5]], i32* [[TMP8]]) #[[ATTR3]] +// CHECK-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_codegen.cpp b/clang/test/OpenMP/nvptx_target_codegen.cpp index 81b36085443aa..c7a498682f4a9 100644 --- a/clang/test/OpenMP/nvptx_target_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_codegen.cpp @@ -155,7 +155,6 @@ void unreachable_call() { // CHECK1-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[PTR2_ADDR]], align 8 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK1-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK1: .execute: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) @@ -257,7 +256,6 @@ void unreachable_call() { // CHECK1-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK1-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK1-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK1-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] // CHECK1: .termination.notifier: // CHECK1-NEXT: call void @__kmpc_kernel_deinit(i16 1) @@ -332,7 +330,6 @@ void unreachable_call() { // CHECK1-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK1-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK1-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 8 // CHECK1-NEXT: [[CONV7:%.*]] = sext i16 [[TMP5]] to i32 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV7]], 1 @@ -441,7 +438,6 @@ void unreachable_call() { // CHECK1-NEXT: [[NVPTX_WARP_SIZE9:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK1-NEXT: [[THREAD_LIMIT10:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS8]], [[NVPTX_WARP_SIZE9]] // CHECK1-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT10]], i16 1) -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 @@ -577,7 +573,6 @@ void unreachable_call() { // CHECK1-NEXT: [[NVPTX_WARP_SIZE7:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK1-NEXT: [[THREAD_LIMIT8:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS6]], [[NVPTX_WARP_SIZE7]] // CHECK1-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT8]], i16 1) -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 8 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 // CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 @@ -681,7 +676,6 @@ void unreachable_call() { // CHECK1-NEXT: [[NVPTX_WARP_SIZE7:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK1-NEXT: [[THREAD_LIMIT8:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS6]], [[NVPTX_WARP_SIZE7]] // CHECK1-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT8]], i16 1) -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 // CHECK1-NEXT: [[CONV9:%.*]] = sitofp i32 [[TMP9]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV9]], 1.500000e+00 @@ -711,56 +705,27 @@ void unreachable_call() { // // // CHECK1-LABEL: define {{[^@]+}}@_Z3baziRd -// CHECK1-SAME: (i32 [[F3:%.*]], double* nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR5]] { +// CHECK1-SAME: (i32 [[F1:%.*]], double* nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR5]] { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[F2:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[A_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK1-NEXT: [[TMP1:%.*]] = call i16 @__kmpc_parallel_level(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]]) -// CHECK1-NEXT: [[TMP2:%.*]] = icmp eq i16 [[TMP1]], 0 -// CHECK1-NEXT: [[TMP3:%.*]] = call i8 @__kmpc_is_spmd_exec_mode() #[[ATTR2]] -// CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK1-NEXT: br i1 [[TMP4]], label [[DOTSPMD:%.*]], label [[DOTNON_SPMD:%.*]] -// CHECK1: .spmd: -// CHECK1-NEXT: br label [[DOTEXIT:%.*]] -// CHECK1: .non-spmd: -// CHECK1-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], i64 4, i64 128 -// CHECK1-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_data_sharing_coalesced_push_stack(i64 [[TMP5]], i16 0) -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to %struct._globalized_locals_ty* -// CHECK1-NEXT: br label [[DOTEXIT]] -// CHECK1: .exit: -// CHECK1-NEXT: [[_SELECT_STACK:%.*]] = phi %struct._globalized_locals_ty* [ null, [[DOTSPMD]] ], [ [[TMP7]], [[DOTNON_SPMD]] ] -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast %struct._globalized_locals_ty* [[_SELECT_STACK]] to %struct._globalized_locals_ty.0* -// CHECK1-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[_SELECT_STACK]], i32 0, i32 0 -// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID]], 31 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], [32 x i32]* [[F]], i32 0, i32 [[NVPTX_LANE_ID]] -// CHECK1-NEXT: [[F1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP8]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP10:%.*]] = select i1 [[TMP2]], i32* [[F1]], i32* [[TMP9]] -// CHECK1-NEXT: [[TMP11:%.*]] = select i1 [[TMP4]], i32* [[F2]], i32* [[TMP10]] -// CHECK1-NEXT: store i32 [[F3]], i32* [[TMP11]], align 4 +// CHECK1-NEXT: [[F:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: [[F_ON_STACK:%.*]] = bitcast i8* [[F]] to i32* +// CHECK1-NEXT: store i32 [[F1]], i32* [[F_ON_STACK]], align 4 // CHECK1-NEXT: store double* [[A]], double** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load double*, double** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP11]] to i8* -// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast double* [[TMP12]] to i8* -// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, double*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP17]], i64 2) -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[RETVAL]], align 4 -// CHECK1-NEXT: br i1 [[TMP4]], label [[DOTEXIT5:%.*]], label [[DOTNON_SPMD4:%.*]] -// CHECK1: .non-spmd4: -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast %struct._globalized_locals_ty* [[_SELECT_STACK]] to i8* -// CHECK1-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP19]]) -// CHECK1-NEXT: br label [[DOTEXIT5]] -// CHECK1: .exit5: -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK1-NEXT: ret i32 [[TMP20]] +// CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[F_ON_STACK]] to i8* +// CHECK1-NEXT: store i8* [[TMP3]], i8** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP5:%.*]] = bitcast double* [[TMP1]] to i8* +// CHECK1-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, double*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP6]], i64 2) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[F_ON_STACK]], align 4 +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[F]]) +// CHECK1-NEXT: ret i32 [[TMP7]] // // // CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142_worker @@ -825,7 +790,6 @@ void unreachable_call() { // CHECK1-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK1-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK1-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK1-NEXT: call void @_Z6asserti(i32 0) #[[ATTR8:[0-9]+]] // CHECK1-NEXT: unreachable // CHECK1: 5: @@ -909,7 +873,6 @@ void unreachable_call() { // CHECK1-NEXT: [[NVPTX_WARP_SIZE6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK1-NEXT: [[THREAD_LIMIT7:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS5]], [[NVPTX_WARP_SIZE6]] // CHECK1-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT7]], i16 1) -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 8 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 // CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 @@ -987,7 +950,6 @@ void unreachable_call() { // CHECK2-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[PTR2_ADDR]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK2: .execute: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) @@ -1089,7 +1051,6 @@ void unreachable_call() { // CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK2-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK2-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] // CHECK2: .termination.notifier: // CHECK2-NEXT: call void @__kmpc_kernel_deinit(i16 1) @@ -1164,7 +1125,6 @@ void unreachable_call() { // CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK2-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK2-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 // CHECK2-NEXT: [[CONV7:%.*]] = sext i16 [[TMP5]] to i32 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV7]], 1 @@ -1272,7 +1232,6 @@ void unreachable_call() { // CHECK2-NEXT: [[NVPTX_WARP_SIZE9:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK2-NEXT: [[THREAD_LIMIT10:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS8]], [[NVPTX_WARP_SIZE9]] // CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT10]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK2-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 @@ -1407,7 +1366,6 @@ void unreachable_call() { // CHECK2-NEXT: [[NVPTX_WARP_SIZE6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK2-NEXT: [[THREAD_LIMIT7:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS5]], [[NVPTX_WARP_SIZE6]] // CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT7]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 // CHECK2-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 @@ -1510,7 +1468,6 @@ void unreachable_call() { // CHECK2-NEXT: [[NVPTX_WARP_SIZE7:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK2-NEXT: [[THREAD_LIMIT8:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS6]], [[NVPTX_WARP_SIZE7]] // CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT8]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP9]] to double // CHECK2-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 @@ -1540,56 +1497,27 @@ void unreachable_call() { // // // CHECK2-LABEL: define {{[^@]+}}@_Z3baziRd -// CHECK2-SAME: (i32 [[F3:%.*]], double* nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR5]] { +// CHECK2-SAME: (i32 [[F1:%.*]], double* nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR5]] { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[F2:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK2-NEXT: [[TMP1:%.*]] = call i16 @__kmpc_parallel_level(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]]) -// CHECK2-NEXT: [[TMP2:%.*]] = icmp eq i16 [[TMP1]], 0 -// CHECK2-NEXT: [[TMP3:%.*]] = call i8 @__kmpc_is_spmd_exec_mode() #[[ATTR2]] -// CHECK2-NEXT: [[TMP4:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK2-NEXT: br i1 [[TMP4]], label [[DOTSPMD:%.*]], label [[DOTNON_SPMD:%.*]] -// CHECK2: .spmd: -// CHECK2-NEXT: br label [[DOTEXIT:%.*]] -// CHECK2: .non-spmd: -// CHECK2-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], i32 4, i32 128 -// CHECK2-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_data_sharing_coalesced_push_stack(i32 [[TMP5]], i16 0) -// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to %struct._globalized_locals_ty* -// CHECK2-NEXT: br label [[DOTEXIT]] -// CHECK2: .exit: -// CHECK2-NEXT: [[_SELECT_STACK:%.*]] = phi %struct._globalized_locals_ty* [ null, [[DOTSPMD]] ], [ [[TMP7]], [[DOTNON_SPMD]] ] -// CHECK2-NEXT: [[TMP8:%.*]] = bitcast %struct._globalized_locals_ty* [[_SELECT_STACK]] to %struct._globalized_locals_ty.0* -// CHECK2-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[_SELECT_STACK]], i32 0, i32 0 -// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK2-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID]], 31 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], [32 x i32]* [[F]], i32 0, i32 [[NVPTX_LANE_ID]] -// CHECK2-NEXT: [[F1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP8]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP10:%.*]] = select i1 [[TMP2]], i32* [[F1]], i32* [[TMP9]] -// CHECK2-NEXT: [[TMP11:%.*]] = select i1 [[TMP4]], i32* [[F2]], i32* [[TMP10]] -// CHECK2-NEXT: store i32 [[F3]], i32* [[TMP11]], align 4 +// CHECK2-NEXT: [[F:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[F_ON_STACK:%.*]] = bitcast i8* [[F]] to i32* +// CHECK2-NEXT: store i32 [[F1]], i32* [[F_ON_STACK]], align 4 // CHECK2-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load double*, double** [[A_ADDR]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP11]] to i8* -// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP16:%.*]] = bitcast double* [[TMP12]] to i8* -// CHECK2-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, double*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP17]], i32 2) -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK2-NEXT: store i32 [[TMP18]], i32* [[RETVAL]], align 4 -// CHECK2-NEXT: br i1 [[TMP4]], label [[DOTEXIT5:%.*]], label [[DOTNON_SPMD4:%.*]] -// CHECK2: .non-spmd4: -// CHECK2-NEXT: [[TMP19:%.*]] = bitcast %struct._globalized_locals_ty* [[_SELECT_STACK]] to i8* -// CHECK2-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP19]]) -// CHECK2-NEXT: br label [[DOTEXIT5]] -// CHECK2: .exit5: -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK2-NEXT: ret i32 [[TMP20]] +// CHECK2-NEXT: [[TMP1:%.*]] = load double*, double** [[A_ADDR]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i32* [[F_ON_STACK]] to i8* +// CHECK2-NEXT: store i8* [[TMP3]], i8** [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP5:%.*]] = bitcast double* [[TMP1]] to i8* +// CHECK2-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, double*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP6]], i32 2) +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[F_ON_STACK]], align 4 +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[F]]) +// CHECK2-NEXT: ret i32 [[TMP7]] // // // CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142_worker @@ -1654,7 +1582,6 @@ void unreachable_call() { // CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK2-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK2-NEXT: call void @_Z6asserti(i32 0) #[[ATTR8:[0-9]+]] // CHECK2-NEXT: unreachable // CHECK2: 5: @@ -1737,7 +1664,6 @@ void unreachable_call() { // CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK2-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 // CHECK2-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 @@ -1815,7 +1741,6 @@ void unreachable_call() { // CHECK3-NEXT: [[TMP0:%.*]] = load i32**, i32*** [[PTR2_ADDR]], align 4 // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK3: .execute: // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) @@ -1917,7 +1842,6 @@ void unreachable_call() { // CHECK3-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK3-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK3-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK3-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] // CHECK3: .termination.notifier: // CHECK3-NEXT: call void @__kmpc_kernel_deinit(i16 1) @@ -1992,7 +1916,6 @@ void unreachable_call() { // CHECK3-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK3-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK3-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* [[CONV]], align 4 // CHECK3-NEXT: [[CONV7:%.*]] = sext i16 [[TMP5]] to i32 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[CONV7]], 1 @@ -2100,7 +2023,6 @@ void unreachable_call() { // CHECK3-NEXT: [[NVPTX_WARP_SIZE9:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK3-NEXT: [[THREAD_LIMIT10:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS8]], [[NVPTX_WARP_SIZE9]] // CHECK3-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT10]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 // CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 @@ -2235,7 +2157,6 @@ void unreachable_call() { // CHECK3-NEXT: [[NVPTX_WARP_SIZE6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK3-NEXT: [[THREAD_LIMIT7:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS5]], [[NVPTX_WARP_SIZE6]] // CHECK3-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT7]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 // CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 @@ -2338,7 +2259,6 @@ void unreachable_call() { // CHECK3-NEXT: [[NVPTX_WARP_SIZE7:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK3-NEXT: [[THREAD_LIMIT8:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS6]], [[NVPTX_WARP_SIZE7]] // CHECK3-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT8]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[B_ADDR]], align 4 // CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP9]] to double // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 @@ -2368,56 +2288,27 @@ void unreachable_call() { // // // CHECK3-LABEL: define {{[^@]+}}@_Z3baziRd -// CHECK3-SAME: (i32 [[F3:%.*]], double* nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR5]] { +// CHECK3-SAME: (i32 [[F1:%.*]], double* nonnull align 8 dereferenceable(8) [[A:%.*]]) #[[ATTR5]] { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[RETVAL:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[F2:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[A_ADDR:%.*]] = alloca double*, align 4 // CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK3-NEXT: [[TMP1:%.*]] = call i16 @__kmpc_parallel_level(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]]) -// CHECK3-NEXT: [[TMP2:%.*]] = icmp eq i16 [[TMP1]], 0 -// CHECK3-NEXT: [[TMP3:%.*]] = call i8 @__kmpc_is_spmd_exec_mode() #[[ATTR2]] -// CHECK3-NEXT: [[TMP4:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK3-NEXT: br i1 [[TMP4]], label [[DOTSPMD:%.*]], label [[DOTNON_SPMD:%.*]] -// CHECK3: .spmd: -// CHECK3-NEXT: br label [[DOTEXIT:%.*]] -// CHECK3: .non-spmd: -// CHECK3-NEXT: [[TMP5:%.*]] = select i1 [[TMP2]], i32 4, i32 128 -// CHECK3-NEXT: [[TMP6:%.*]] = call i8* @__kmpc_data_sharing_coalesced_push_stack(i32 [[TMP5]], i16 0) -// CHECK3-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to %struct._globalized_locals_ty* -// CHECK3-NEXT: br label [[DOTEXIT]] -// CHECK3: .exit: -// CHECK3-NEXT: [[_SELECT_STACK:%.*]] = phi %struct._globalized_locals_ty* [ null, [[DOTSPMD]] ], [ [[TMP7]], [[DOTNON_SPMD]] ] -// CHECK3-NEXT: [[TMP8:%.*]] = bitcast %struct._globalized_locals_ty* [[_SELECT_STACK]] to %struct._globalized_locals_ty.0* -// CHECK3-NEXT: [[F:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[_SELECT_STACK]], i32 0, i32 0 -// CHECK3-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK3-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID]], 31 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [32 x i32], [32 x i32]* [[F]], i32 0, i32 [[NVPTX_LANE_ID]] -// CHECK3-NEXT: [[F1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP8]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = select i1 [[TMP2]], i32* [[F1]], i32* [[TMP9]] -// CHECK3-NEXT: [[TMP11:%.*]] = select i1 [[TMP4]], i32* [[F2]], i32* [[TMP10]] -// CHECK3-NEXT: store i32 [[F3]], i32* [[TMP11]], align 4 +// CHECK3-NEXT: [[F:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK3-NEXT: [[F_ON_STACK:%.*]] = bitcast i8* [[F]] to i32* +// CHECK3-NEXT: store i32 [[F1]], i32* [[F_ON_STACK]], align 4 // CHECK3-NEXT: store double* [[A]], double** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load double*, double** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP11]] to i8* -// CHECK3-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP16:%.*]] = bitcast double* [[TMP12]] to i8* -// CHECK3-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, double*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP17]], i32 2) -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP11]], align 4 -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[RETVAL]], align 4 -// CHECK3-NEXT: br i1 [[TMP4]], label [[DOTEXIT5:%.*]], label [[DOTNON_SPMD4:%.*]] -// CHECK3: .non-spmd4: -// CHECK3-NEXT: [[TMP19:%.*]] = bitcast %struct._globalized_locals_ty* [[_SELECT_STACK]] to i8* -// CHECK3-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP19]]) -// CHECK3-NEXT: br label [[DOTEXIT5]] -// CHECK3: .exit5: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[RETVAL]], align 4 -// CHECK3-NEXT: ret i32 [[TMP20]] +// CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = bitcast i32* [[F_ON_STACK]] to i8* +// CHECK3-NEXT: store i8* [[TMP3]], i8** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = bitcast double* [[TMP1]] to i8* +// CHECK3-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP0]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, double*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP6]], i32 2) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[F_ON_STACK]], align 4 +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[F]]) +// CHECK3-NEXT: ret i32 [[TMP7]] // // // CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z16unreachable_callv_l142_worker @@ -2482,7 +2373,6 @@ void unreachable_call() { // CHECK3-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK3-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK3-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK3-NEXT: call void @_Z6asserti(i32 0) #[[ATTR8:[0-9]+]] // CHECK3-NEXT: unreachable // CHECK3: 5: @@ -2565,7 +2455,6 @@ void unreachable_call() { // CHECK3-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK3-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK3-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP6]], 1 // CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 diff --git a/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp index 3bc7c0b640086..37b0e7ee02db9 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_codegen.cpp @@ -60,7 +60,6 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK1-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK1: .execute: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) @@ -110,7 +109,6 @@ int bar(int n){ // CHECK1-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK1-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK1: .execute: // CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -173,7 +171,6 @@ int bar(int n){ // CHECK2-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK2: .execute: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) @@ -223,7 +220,6 @@ int bar(int n){ // CHECK2-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK2: .execute: // CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -286,7 +282,6 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK3: .execute: // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) @@ -336,7 +331,6 @@ int bar(int n){ // CHECK3-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK3: .execute: // CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -399,7 +393,6 @@ int bar(int n){ // CHECK4-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 // CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK4-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK4-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK4-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK4: .execute: // CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) @@ -449,7 +442,6 @@ int bar(int n){ // CHECK4-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 // CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK4-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK4-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK4-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK4: .execute: // CHECK4-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -512,7 +504,6 @@ int bar(int n){ // CHECK5-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 // CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK5-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK5-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK5-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK5: .execute: // CHECK5-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) @@ -562,7 +553,6 @@ int bar(int n){ // CHECK5-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 // CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK5-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK5-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK5-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK5: .execute: // CHECK5-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -625,7 +615,6 @@ int bar(int n){ // CHECK6-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 // CHECK6-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK6-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK6-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK6-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK6: .execute: // CHECK6-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) @@ -675,7 +664,6 @@ int bar(int n){ // CHECK6-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 // CHECK6-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK6-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK6-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK6-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK6: .execute: // CHECK6-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) diff --git a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp index 2a08878ca32ed..1ae8a6de4b144 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_num_threads_codegen.cpp @@ -55,7 +55,6 @@ int bar(int n){ // CHECK1-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK1-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK1: .execute: // CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) @@ -108,7 +107,6 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK1-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK1: .execute: // CHECK1-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -172,7 +170,6 @@ int bar(int n){ // CHECK2-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK2: .execute: // CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) @@ -224,7 +221,6 @@ int bar(int n){ // CHECK2-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK2: .execute: // CHECK2-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -288,7 +284,6 @@ int bar(int n){ // CHECK3-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK3: .execute: // CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) @@ -340,7 +335,6 @@ int bar(int n){ // CHECK3-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK3: .execute: // CHECK3-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -404,7 +398,6 @@ int bar(int n){ // CHECK4-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 8 // CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK4-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK4-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK4-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK4: .execute: // CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) @@ -457,7 +450,6 @@ int bar(int n){ // CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i32* // CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK4-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK4-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK4-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK4: .execute: // CHECK4-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -521,7 +513,6 @@ int bar(int n){ // CHECK5-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 // CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK5-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK5-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK5-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK5: .execute: // CHECK5-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) @@ -573,7 +564,6 @@ int bar(int n){ // CHECK5-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 // CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK5-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK5-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK5-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK5: .execute: // CHECK5-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) @@ -637,7 +627,6 @@ int bar(int n){ // CHECK6-NEXT: [[TMP0:%.*]] = load i16*, i16** [[AA_ADDR]], align 4 // CHECK6-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK6-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK6-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK6-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK6: .execute: // CHECK6-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) @@ -689,7 +678,6 @@ int bar(int n){ // CHECK6-NEXT: [[TMP2:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 // CHECK6-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK6-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK6-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK6-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK6: .execute: // CHECK6-NEXT: [[TMP3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) diff --git a/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp index 7b3de7d462d29..572657f29f1ee 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_proc_bind_codegen.cpp @@ -55,7 +55,6 @@ int bar(int n){ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l29}}( // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) -// CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: br label {{%?}}[[EXEC:.+]] // // CHECK: [[EXEC]] @@ -73,7 +72,6 @@ int bar(int n){ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l33}}( // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) -// CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: br label {{%?}}[[EXEC:.+]] // // CHECK: [[EXEC]] @@ -91,7 +89,6 @@ int bar(int n){ // CHECK-LABEL: define {{.*}}void {{@__omp_offloading_.+template.+l38}}( // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) -// CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: br label {{%?}}[[EXEC:.+]] // // CHECK: [[EXEC]] diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp index a69a9f90a8394..9c5ad319b6812 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen.cpp @@ -55,7 +55,6 @@ int bar(int n){ // CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l27}}( // // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) -// CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: br label {{%?}}[[EXECUTE:.+]] // // CHECK: [[EXECUTE]] @@ -239,7 +238,6 @@ int bar(int n){ // CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l32}}( // // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) -// CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: br label {{%?}}[[EXECUTE:.+]] // // CHECK: [[EXECUTE]] @@ -501,7 +499,6 @@ int bar(int n){ // CHECK: define {{.*}}void {{@__omp_offloading_.+template.+l38}}( // // CHECK: call void @__kmpc_spmd_kernel_init(i32 {{.+}}, i16 1) -// CHECK: call void @__kmpc_data_sharing_init_stack_spmd // CHECK: br label {{%?}}[[EXECUTE:.+]] // // CHECK: [[EXECUTE]] diff --git a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp index 293fa02004c16..23a7837d43314 100644 --- a/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp +++ b/clang/test/OpenMP/nvptx_target_parallel_reduction_codegen_tbaa_PR46146.cpp @@ -59,7 +59,7 @@ void test() { // CHECK1-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*) // CHECK1-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] // CHECK1: .execute.fn: -// CHECK1-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR5:[0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR2:[0-9]+]] // CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] // CHECK1: .check.next: // CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* @@ -88,7 +88,7 @@ void test() { // CHECK1-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] // CHECK1-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] // CHECK1: .worker: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l19_worker() #[[ATTR5]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l19_worker() #[[ATTR2]] // CHECK1-NEXT: br label [[DOTEXIT:%.*]] // CHECK1: .mastercheck: // CHECK1-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() @@ -105,10 +105,9 @@ void test() { // CHECK1-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK1-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK1-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA6:![0-9]+]] -// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5]] +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]] +// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] // CHECK1-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] // CHECK1: .termination.notifier: // CHECK1-NEXT: call void @__kmpc_kernel_deinit(i16 1) @@ -133,133 +132,131 @@ void test() { // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[REF_TMP2:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA10:![0-9]+]] -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2, !tbaa [[TBAA12:![0-9]+]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* @"_openmp_static_kernel$size", align 8, !tbaa [[TBAA14:![0-9]+]] -// CHECK1-NEXT: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i64 [[TMP1]], i16 [[TMP0]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) -// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty* -// CHECK1-NEXT: [[ISTART:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP4]], i32 0, i32 0 -// CHECK1-NEXT: [[IEND:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], %struct._globalized_locals_ty* [[TMP4]], i32 0, i32 1 -// CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], %struct._globalized_locals_ty* [[TMP4]], i32 0, i32 2 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR5]] -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP7]]) #[[ATTR5]] -// CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR5]] -// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP9]]) #[[ATTR5]] -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i32* [[IB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP10]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 99 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12:![0-9]+]] +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[ISTART:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: [[ISTART_ON_STACK:%.*]] = bitcast i8* [[ISTART]] to i32* +// CHECK1-NEXT: [[IEND:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: [[IEND_ON_STACK:%.*]] = bitcast i8* [[IEND]] to i32* +// CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = call i8* @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex"* +// CHECK1-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR2]] +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR2]] +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i32* [[IB]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[IB]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast float* [[REF_TMP]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR5]] -// CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP]], align 4, !tbaa [[TBAA16:![0-9]+]] -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast float* [[REF_TMP2]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR5]] -// CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP2]], align 4, !tbaa [[TBAA16]] -// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR7:[0-9]+]] -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast float* [[REF_TMP2]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP21]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP22:%.*]] = bitcast float* [[REF_TMP]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP22]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP23]], 4 -// CHECK1-NEXT: store i32 [[MUL3]], i32* [[ISTART]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[IB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast float* [[REF_TMP]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP14]]) #[[ATTR2]] +// CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP]], align 4, !tbaa [[TBAA14:![0-9]+]] +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast float* [[REF_TMP2]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP15]]) #[[ATTR2]] +// CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP2]], align 4, !tbaa [[TBAA14]] +// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM_ON_STACK]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR8:[0-9]+]] +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast float* [[REF_TMP2]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast float* [[REF_TMP]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP18]], 4 +// CHECK1-NEXT: store i32 [[MUL3]], i32* [[ISTART_ON_STACK]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[ADD4]], 4 -// CHECK1-NEXT: store i32 [[MUL5]], i32* [[IEND]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i32* [[ISTART]] to i8* -// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP28:%.*]] = bitcast i32* [[IEND]] to i8* -// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP30:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM]] to i8* -// CHECK1-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: [[TMP31:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i32*, %"class.std::complex"*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP31]], i64 3) +// CHECK1-NEXT: store i32 [[MUL5]], i32* [[IEND_ON_STACK]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[ISTART_ON_STACK]] to i8* +// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i32* [[IEND_ON_STACK]] to i8* +// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP25:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM_ON_STACK]] to i8* +// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i32*, %"class.std::complex"*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP26]], i64 3) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) -// CHECK1-NEXT: [[TMP33:%.*]] = bitcast i32* [[IB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP34:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP34]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP35:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP35]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP36:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP36]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP37:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP37]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP38:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP38]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP39:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2, !tbaa [[TBAA12]] -// CHECK1-NEXT: call void @__kmpc_restore_team_static_memory(i16 0, i16 [[TMP39]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast i32* [[IB]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP28]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP29:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP29]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP30:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP30]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP31:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP31]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP32:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[IEND]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIfEC1ERKfS2_ -// CHECK1-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]], float* nonnull align 4 dereferenceable(4) [[__RE:%.*]], float* nonnull align 4 dereferenceable(4) [[__IM:%.*]]) unnamed_addr #[[ATTR3:[0-9]+]] comdat align 2 { +// CHECK1-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]], float* nonnull align 4 dereferenceable(4) [[__RE:%.*]], float* nonnull align 4 dereferenceable(4) [[__IM:%.*]]) unnamed_addr #[[ATTR4:[0-9]+]] comdat align 2 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 // CHECK1-NEXT: [[__RE_ADDR:%.*]] = alloca float*, align 8 // CHECK1-NEXT: [[__IM_ADDR:%.*]] = alloca float*, align 8 -// CHECK1-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: store float* [[__RE]], float** [[__RE_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: store float* [[__IM]], float** [[__IM_ADDR]], align 8, !tbaa [[TBAA10]] +// CHECK1-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store float* [[__RE]], float** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store float* [[__IM]], float** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load float*, float** [[__IM_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR7]] +// CHECK1-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR8]] // CHECK1-NEXT: ret void // // @@ -289,95 +286,95 @@ void test() { // CHECK1-NEXT: [[REF_TMP15:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[REF_TMP16:%.*]] = alloca float, align 4 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: store i32* [[ISTART]], i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: store i32* [[IEND]], i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: store %"class.std::complex"* [[PARTIAL_SUM]], %"class.std::complex"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA10]] +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store i32* [[ISTART]], i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store i32* [[IEND]], i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store %"class.std::complex"* [[PARTIAL_SUM]], %"class.std::complex"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP6:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP11:%.*]] = bitcast i32* [[I]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: store i32 [[TMP12]], i32* [[I]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[I]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP13:%.*]] = bitcast i32* [[I]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: [[TMP16:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR5]] -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR2]] +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP17:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP19:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR5]] -// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR2]] +// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR5]] -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR2]] +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP21:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP21]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP21]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP22:%.*]] = bitcast float* [[REF_TMP]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP22]]) #[[ATTR5]] -// CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP]], align 4, !tbaa [[TBAA16]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP22]]) #[[ATTR2]] +// CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP]], align 4, !tbaa [[TBAA14]] // CHECK1-NEXT: [[TMP23:%.*]] = bitcast float* [[REF_TMP6]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP23]]) #[[ATTR5]] -// CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP6]], align 4, !tbaa [[TBAA16]] -// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR7]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP23]]) #[[ATTR2]] +// CHECK1-NEXT: store float 0.000000e+00, float* [[REF_TMP6]], align 4, !tbaa [[TBAA14]] +// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR8]] // CHECK1-NEXT: [[TMP24:%.*]] = bitcast float* [[REF_TMP6]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP24]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP24]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP25:%.*]] = bitcast float* [[REF_TMP]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP25]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP25]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP26:%.*]] = bitcast i32* [[I7]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP28]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[TMP29]], [[TMP30]] // CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE]] ], [ [[TMP32]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP35]], 1 // CHECK1-NEXT: [[CMP10:%.*]] = icmp ult i32 [[TMP34]], [[ADD9]] // CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] @@ -386,65 +383,65 @@ void test() { // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP37]], 1 // CHECK1-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP36]], [[ADD11]] // CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP39]], 1 // CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP38]], [[MUL]] -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[I7]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP40:%.*]] = bitcast %"class.std::complex"* [[REF_TMP14]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP40]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP40]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP41:%.*]] = bitcast float* [[REF_TMP15]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP41]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP41]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP42]] to float -// CHECK1-NEXT: store float [[CONV]], float* [[REF_TMP15]], align 4, !tbaa [[TBAA16]] +// CHECK1-NEXT: store float [[CONV]], float* [[REF_TMP15]], align 4, !tbaa [[TBAA14]] // CHECK1-NEXT: [[TMP43:%.*]] = bitcast float* [[REF_TMP16]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP43]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP43]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to float -// CHECK1-NEXT: store float [[CONV17]], float* [[REF_TMP16]], align 4, !tbaa [[TBAA16]] -// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR7]] -// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR7]] +// CHECK1-NEXT: store float [[CONV17]], float* [[REF_TMP16]], align 4, !tbaa [[TBAA14]] +// CHECK1-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR8]] +// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR8]] // CHECK1-NEXT: [[TMP45:%.*]] = bitcast float* [[REF_TMP16]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP45]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP45]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP46:%.*]] = bitcast float* [[REF_TMP15]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP46]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP46]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex"* [[REF_TMP14]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP47]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP47]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP48]], 1 -// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[ADD19:%.*]] = add i32 [[TMP49]], [[TMP50]] -// CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP51]], [[TMP52]] -// CHECK1-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: // CHECK1-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP54]]) // CHECK1-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK1-NEXT: [[TMP58:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8* // CHECK1-NEXT: store i8* [[TMP58]], i8** [[TMP57]], align 8 @@ -453,55 +450,55 @@ void test() { // CHECK1-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 // CHECK1-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR7]] +// CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR8]] // CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: // CHECK1-NEXT: [[TMP62:%.*]] = bitcast i32* [[I7]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP63:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP63]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP63]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP64:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP65:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP66:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP67:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: [[TMP68:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP69:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP70:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP71:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR2]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIfEpLIfEERS0_RKS_IT_E -// CHECK1-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[__C:%.*]]) #[[ATTR4:[0-9]+]] comdat align 2 { +// CHECK1-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[__C:%.*]]) #[[ATTR5:[0-9]+]] comdat align 2 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 // CHECK1-NEXT: [[__C_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 -// CHECK1-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: store %"class.std::complex"* [[__C]], %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]] +// CHECK1-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store %"class.std::complex"* [[__C]], %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR7]] +// CHECK1-NEXT: [[TMP0:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR8]] // CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP1:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA18:![0-9]+]] +// CHECK1-NEXT: [[TMP1:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA16:![0-9]+]] // CHECK1-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[CALL]] -// CHECK1-NEXT: store float [[ADD]], float* [[__RE_]], align 4, !tbaa [[TBAA18]] -// CHECK1-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR7]] +// CHECK1-NEXT: store float [[ADD]], float* [[__RE_]], align 4, !tbaa [[TBAA16]] +// CHECK1-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR8]] // CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA20:![0-9]+]] +// CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA18:![0-9]+]] // CHECK1-NEXT: [[ADD3:%.*]] = fadd float [[TMP3]], [[CALL2]] -// CHECK1-NEXT: store float [[ADD3]], float* [[__IM_]], align 4, !tbaa [[TBAA20]] +// CHECK1-NEXT: store float [[ADD3]], float* [[__IM_]], align 4, !tbaa [[TBAA18]] // CHECK1-NEXT: ret %"class.std::complex"* [[THIS1]] // // @@ -514,15 +511,15 @@ void test() { // CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca %"class.std::complex", align 4 -// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2, !tbaa [[TBAA12]] -// CHECK1-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2, !tbaa [[TBAA12]] -// CHECK1-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !tbaa [[TBAA10]] +// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2, !tbaa [[TBAA19:![0-9]+]] +// CHECK1-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] +// CHECK1-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* -// CHECK1-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2, !tbaa [[TBAA19]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 // CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 @@ -539,7 +536,7 @@ void test() { // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr i64, i64* [[TMP15]], i64 1 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP16]], i64 1 // CHECK1-NEXT: [[TMP22:%.*]] = bitcast %"class.std::complex"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK1-NEXT: store i8* [[TMP22]], i8** [[TMP11]], align 8, !tbaa [[TBAA10]] +// CHECK1-NEXT: store i8* [[TMP22]], i8** [[TMP11]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[TMP23:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK1-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK1-NEXT: [[TMP25:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -556,7 +553,7 @@ void test() { // CHECK1: then: // CHECK1-NEXT: [[TMP35:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* // CHECK1-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR5]] +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR2]] // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] @@ -589,8 +586,8 @@ void test() { // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() // CHECK1-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() // CHECK1-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 @@ -598,10 +595,10 @@ void test() { // CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 // CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* -// CHECK1-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[PRECOND:%.*]] // CHECK1: precond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP5]], 2 // CHECK1-NEXT: br i1 [[TMP6]], label [[BODY:%.*]], label [[EXIT:%.*]] // CHECK1: body: @@ -610,7 +607,7 @@ void test() { // CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK1: then: // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 8, !tbaa [[TBAA10]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[TMP9]], i32 [[TMP5]] // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] @@ -621,23 +618,23 @@ void test() { // CHECK1-NEXT: br label [[IFCONT]] // CHECK1: ifcont: // CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP13]] // CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK1: then4: // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8, !tbaa [[TBAA10]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i32* // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP17]], i32 [[TMP5]] -// CHECK1-NEXT: [[TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[IFCONT6:%.*]] // CHECK1: else5: // CHECK1-NEXT: br label [[IFCONT6]] // CHECK1: ifcont6: // CHECK1-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[PRECOND]] // CHECK1: exit: // CHECK1-NEXT: ret void @@ -651,20 +648,20 @@ void test() { // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA12]] -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]] +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8, !tbaa [[TBAA10]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1 // CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32** -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8, !tbaa [[TBAA10]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 2 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex"** -// CHECK1-NEXT: [[TMP11:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[TMP10]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex"* [[TMP11]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP11:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[TMP10]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex"* [[TMP11]]) #[[ATTR2]] // CHECK1-NEXT: ret void // // @@ -691,10 +688,10 @@ void test() { // CHECK1: .execute.parallel: // CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK1-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*) +// CHECK1-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*) // CHECK1-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] // CHECK1: .execute.fn: -// CHECK1-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR5]] +// CHECK1-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR2]] // CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] // CHECK1: .check.next: // CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* @@ -723,7 +720,7 @@ void test() { // CHECK1-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] // CHECK1-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] // CHECK1: .worker: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l19_worker() #[[ATTR5]] +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l19_worker() #[[ATTR2]] // CHECK1-NEXT: br label [[DOTEXIT:%.*]] // CHECK1: .mastercheck: // CHECK1-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() @@ -740,10 +737,9 @@ void test() { // CHECK1-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK1-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK1-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5]] +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] // CHECK1-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] // CHECK1: .termination.notifier: // CHECK1-NEXT: call void @__kmpc_kernel_deinit(i16 1) @@ -768,144 +764,142 @@ void test() { // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[REF_TMP2:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared3", align 2, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* @"_openmp_static_kernel$size4", align 8, !tbaa [[TBAA14]] -// CHECK1-NEXT: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i64 [[TMP1]], i16 [[TMP0]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) -// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.0* -// CHECK1-NEXT: [[ISTART:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 1 -// CHECK1-NEXT: [[IEND:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 2 -// CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR5]] -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP7]]) #[[ATTR5]] -// CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR5]] -// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP9]]) #[[ATTR5]] -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i32* [[IB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP10]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 99 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[ISTART:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: [[ISTART_ON_STACK:%.*]] = bitcast i8* [[ISTART]] to i32* +// CHECK1-NEXT: [[IEND:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: [[IEND_ON_STACK:%.*]] = bitcast i8* [[IEND]] to i32* +// CHECK1-NEXT: [[PARTIAL_SUM:%.*]] = call i8* @__kmpc_alloc_shared(i64 16) +// CHECK1-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex.0"* +// CHECK1-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR2]] +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP2:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP2]]) #[[ATTR2]] +// CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR2]] +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i32* [[IB]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 99 // CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[IB]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP19:%.*]] = bitcast double* [[REF_TMP]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP19]]) #[[ATTR5]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[IB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast double* [[REF_TMP]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP14]]) #[[ATTR2]] // CHECK1-NEXT: store double 0.000000e+00, double* [[REF_TMP]], align 8, !tbaa [[TBAA22:![0-9]+]] -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast double* [[REF_TMP2]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP20]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast double* [[REF_TMP2]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP15]]) #[[ATTR2]] // CHECK1-NEXT: store double 0.000000e+00, double* [[REF_TMP2]], align 8, !tbaa [[TBAA22]] -// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR7]] -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast double* [[REF_TMP2]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP21]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP22:%.*]] = bitcast double* [[REF_TMP]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP22]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP23]], 4 -// CHECK1-NEXT: store i32 [[MUL3]], i32* [[ISTART]], align 8, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM_ON_STACK]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR8]] +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast double* [[REF_TMP2]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP16]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast double* [[REF_TMP]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP17]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP18]], 4 +// CHECK1-NEXT: store i32 [[MUL3]], i32* [[ISTART_ON_STACK]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK1-NEXT: [[MUL5:%.*]] = mul nsw i32 [[ADD4]], 4 -// CHECK1-NEXT: store i32 [[MUL5]], i32* [[IEND]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i32* [[ISTART]] to i8* -// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP28:%.*]] = bitcast i32* [[IEND]] to i8* -// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP30:%.*]] = bitcast %"class.std::complex.1"* [[PARTIAL_SUM]] to i8* -// CHECK1-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: [[TMP31:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i32*, %"class.std::complex.1"*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** [[TMP31]], i64 3) +// CHECK1-NEXT: store i32 [[MUL5]], i32* [[IEND_ON_STACK]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[ISTART_ON_STACK]] to i8* +// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i32* [[IEND_ON_STACK]] to i8* +// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP25:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM_ON_STACK]] to i8* +// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i32*, %"class.std::complex.0"*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP26]], i64 3) // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) -// CHECK1-NEXT: [[TMP33:%.*]] = bitcast i32* [[IB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP34:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP34]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP35:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP35]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP36:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP36]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP37:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP37]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP38:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP38]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP39:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared3", align 2, !tbaa [[TBAA12]] -// CHECK1-NEXT: call void @__kmpc_restore_team_static_memory(i16 0, i16 [[TMP39]]) +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast i32* [[IB]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP28]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP29:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP29]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP30:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP30]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP31:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP31]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP32:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR2]] +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[IEND]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]]) // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIdEC1ERKdS2_ -// CHECK1-SAME: (%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], double* nonnull align 8 dereferenceable(8) [[__RE:%.*]], double* nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 { +// CHECK1-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], double* nonnull align 8 dereferenceable(8) [[__RE:%.*]], double* nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.1"*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 // CHECK1-NEXT: [[__RE_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[__IM_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: store %"class.std::complex.1"* [[THIS]], %"class.std::complex.1"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: store double* [[__RE]], double** [[__RE_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: store double* [[__IM]], double** [[__IM_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store double* [[__RE]], double** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store double* [[__IM]], double** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8 // CHECK1-NEXT: [[TMP1:%.*]] = load double*, double** [[__IM_ADDR]], align 8 -// CHECK1-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR7]] +// CHECK1-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR8]] // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ISTART:%.*]], i32* nonnull align 4 dereferenceable(4) [[IEND:%.*]], %"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM:%.*]]) #[[ATTR0]] { +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3 +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ISTART:%.*]], i32* nonnull align 4 dereferenceable(4) [[IEND:%.*]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[ISTART_ADDR:%.*]] = alloca i32*, align 8 // CHECK1-NEXT: [[IEND_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[PARTIAL_SUM_ADDR:%.*]] = alloca %"class.std::complex.1"*, align 8 +// CHECK1-NEXT: [[PARTIAL_SUM_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 // CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -916,103 +910,103 @@ void test() { // CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[PARTIAL_SUM5:%.*]] = alloca %"class.std::complex.1", align 8 +// CHECK1-NEXT: [[PARTIAL_SUM5:%.*]] = alloca %"class.std::complex.0", align 8 // CHECK1-NEXT: [[REF_TMP:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[REF_TMP6:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[I7:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[REF_TMP14:%.*]] = alloca %"class.std::complex.1", align 8 +// CHECK1-NEXT: [[REF_TMP14:%.*]] = alloca %"class.std::complex.0", align 8 // CHECK1-NEXT: [[REF_TMP15:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[REF_TMP16:%.*]] = alloca double, align 8 // CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: store i32* [[ISTART]], i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: store i32* [[IEND]], i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: store %"class.std::complex.1"* [[PARTIAL_SUM]], %"class.std::complex.1"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: [[TMP2:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA10]] +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store i32* [[ISTART]], i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store i32* [[IEND]], i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store %"class.std::complex.0"* [[PARTIAL_SUM]], %"class.std::complex.0"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP2:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP6:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[SUB:%.*]] = sub i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK1-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK1-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP11:%.*]] = bitcast i32* [[I]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: store i32 [[TMP12]], i32* [[I]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[I]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP13:%.*]] = bitcast i32* [[I]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK1: omp.precond.then: // CHECK1-NEXT: [[TMP16:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR5]] -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR2]] +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP17:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP19:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR5]] -// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR2]] +// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR5]] -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast %"class.std::complex.1"* [[PARTIAL_SUM5]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP21]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR2]] +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM5]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP21]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP22:%.*]] = bitcast double* [[REF_TMP]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP22]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP22]]) #[[ATTR2]] // CHECK1-NEXT: store double 0.000000e+00, double* [[REF_TMP]], align 8, !tbaa [[TBAA22]] // CHECK1-NEXT: [[TMP23:%.*]] = bitcast double* [[REF_TMP6]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP23]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP23]]) #[[ATTR2]] // CHECK1-NEXT: store double 0.000000e+00, double* [[REF_TMP6]], align 8, !tbaa [[TBAA22]] -// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR7]] +// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR8]] // CHECK1-NEXT: [[TMP24:%.*]] = bitcast double* [[REF_TMP6]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP24]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP24]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP25:%.*]] = bitcast double* [[REF_TMP]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP25]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP25]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP26:%.*]] = bitcast i32* [[I7]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB2]], i32 [[TMP28]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[TMP29]], [[TMP30]] // CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK1: cond.true: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[COND_END:%.*]] // CHECK1: cond.false: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[COND_END]] // CHECK1: cond.end: // CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE]] ], [ [[TMP32]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP35]], 1 // CHECK1-NEXT: [[CMP10:%.*]] = icmp ult i32 [[TMP34]], [[ADD9]] // CHECK1-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] @@ -1021,126 +1015,126 @@ void test() { // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[ADD11:%.*]] = add i32 [[TMP37]], 1 // CHECK1-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP36]], [[ADD11]] // CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK1: omp.inner.for.cond.cleanup: // CHECK1-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP39]], 1 // CHECK1-NEXT: [[ADD13:%.*]] = add i32 [[TMP38]], [[MUL]] -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[I7]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP40:%.*]] = bitcast %"class.std::complex.1"* [[REF_TMP14]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP40]]) #[[ATTR5]] +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[I7]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP40:%.*]] = bitcast %"class.std::complex.0"* [[REF_TMP14]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP40]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP41:%.*]] = bitcast double* [[REF_TMP15]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP41]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP41]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP42]] to double // CHECK1-NEXT: store double [[CONV]], double* [[REF_TMP15]], align 8, !tbaa [[TBAA22]] // CHECK1-NEXT: [[TMP43:%.*]] = bitcast double* [[REF_TMP16]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP43]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP43]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to double // CHECK1-NEXT: store double [[CONV17]], double* [[REF_TMP16]], align 8, !tbaa [[TBAA22]] -// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR7]] -// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.1"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR7]] +// CHECK1-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR8]] +// CHECK1-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR8]] // CHECK1-NEXT: [[TMP45:%.*]] = bitcast double* [[REF_TMP16]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP45]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP45]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP46:%.*]] = bitcast double* [[REF_TMP15]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP46]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex.1"* [[REF_TMP14]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP47]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP46]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex.0"* [[REF_TMP14]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP47]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[ADD18:%.*]] = add i32 [[TMP48]], 1 -// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[ADD19:%.*]] = add i32 [[TMP49]], [[TMP50]] -// CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[ADD20:%.*]] = add i32 [[TMP51]], [[TMP52]] -// CHECK1-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK1: omp.dispatch.end: // CHECK1-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP54]]) // CHECK1-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP57:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP58:%.*]] = bitcast %"class.std::complex.1"* [[PARTIAL_SUM5]] to i8* +// CHECK1-NEXT: [[TMP58:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM5]] to i8* // CHECK1-NEXT: store i8* [[TMP58]], i8** [[TMP57]], align 8 // CHECK1-NEXT: [[TMP59:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP60:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP56]], i32 1, i64 8, i8* [[TMP59]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func7, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func8) +// CHECK1-NEXT: [[TMP60:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP56]], i32 1, i64 8, i8* [[TMP59]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func5, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func6) // CHECK1-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 // CHECK1-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.1"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR7]] +// CHECK1-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR8]] // CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) // CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK1: .omp.reduction.done: // CHECK1-NEXT: [[TMP62:%.*]] = bitcast i32* [[I7]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP63:%.*]] = bitcast %"class.std::complex.1"* [[PARTIAL_SUM5]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP63]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR2]] +// CHECK1-NEXT: [[TMP63:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM5]] to i8* +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP63]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP64:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP65:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP66:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP67:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR2]] // CHECK1-NEXT: br label [[OMP_PRECOND_END]] // CHECK1: omp.precond.end: // CHECK1-NEXT: [[TMP68:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP69:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP70:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP71:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR5]] +// CHECK1-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR2]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIdEpLIdEERS0_RKS_IT_E -// CHECK1-SAME: (%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], %"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[__C:%.*]]) #[[ATTR4]] comdat align 2 { +// CHECK1-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[__C:%.*]]) #[[ATTR5]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.1"*, align 8 -// CHECK1-NEXT: [[__C_ADDR:%.*]] = alloca %"class.std::complex.1"*, align 8 -// CHECK1-NEXT: store %"class.std::complex.1"* [[THIS]], %"class.std::complex.1"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: store %"class.std::complex.1"* [[__C]], %"class.std::complex.1"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR7]] -// CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 +// CHECK1-NEXT: [[__C_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 +// CHECK1-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store %"class.std::complex.0"* [[__C]], %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR8]] +// CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP1:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24:![0-9]+]] // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[CALL]] // CHECK1-NEXT: store double [[ADD]], double* [[__RE_]], align 8, !tbaa [[TBAA24]] -// CHECK1-NEXT: [[TMP2:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR7]] -// CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR8]] +// CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 // CHECK1-NEXT: [[TMP3:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26:![0-9]+]] // CHECK1-NEXT: [[ADD3:%.*]] = fadd double [[TMP3]], [[CALL2]] // CHECK1-NEXT: store double [[ADD3]], double* [[__IM_]], align 8, !tbaa [[TBAA26]] -// CHECK1-NEXT: ret %"class.std::complex.1"* [[THIS1]] +// CHECK1-NEXT: ret %"class.std::complex.0"* [[THIS1]] // // -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func7 +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func5 // CHECK1-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 @@ -1148,24 +1142,24 @@ void test() { // CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 // CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 8 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca %"class.std::complex.1", align 8 -// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2, !tbaa [[TBAA12]] -// CHECK1-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2, !tbaa [[TBAA12]] -// CHECK1-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !tbaa [[TBAA10]] +// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca %"class.std::complex.0", align 8 +// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2, !tbaa [[TBAA19]] +// CHECK1-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] +// CHECK1-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] +// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* -// CHECK1-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2, !tbaa [[TBAA12]] -// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2, !tbaa [[TBAA19]] +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 // CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to %"class.std::complex.1"* -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr %"class.std::complex.1", %"class.std::complex.1"* [[TMP12]], i64 1 -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast %"class.std::complex.1"* [[TMP13]] to i8* -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex.1"* [[TMP12]] to i64* -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast %"class.std::complex.1"* [[DOTOMP_REDUCTION_ELEMENT]] to i64* +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to %"class.std::complex.0"* +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr %"class.std::complex.0", %"class.std::complex.0"* [[TMP12]], i64 1 +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast %"class.std::complex.0"* [[TMP13]] to i8* +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex.0"* [[TMP12]] to i64* +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i64* // CHECK1-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] // CHECK1: .shuffle.pre_cond: // CHECK1-NEXT: [[TMP17:%.*]] = phi i64* [ [[TMP15]], [[ENTRY:%.*]] ], [ [[TMP28:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] @@ -1187,8 +1181,8 @@ void test() { // CHECK1-NEXT: [[TMP29]] = getelementptr i64, i64* [[TMP18]], i64 1 // CHECK1-NEXT: br label [[DOTSHUFFLE_PRE_COND]] // CHECK1: .shuffle.exit: -// CHECK1-NEXT: [[TMP30:%.*]] = bitcast %"class.std::complex.1"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK1-NEXT: store i8* [[TMP30]], i8** [[TMP11]], align 8, !tbaa [[TBAA10]] +// CHECK1-NEXT: [[TMP30:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK1-NEXT: store i8* [[TMP30]], i8** [[TMP11]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK1-NEXT: [[TMP32:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK1-NEXT: [[TMP33:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -1205,7 +1199,7 @@ void test() { // CHECK1: then: // CHECK1-NEXT: [[TMP43:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* // CHECK1-NEXT: [[TMP44:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func6"(i8* [[TMP43]], i8* [[TMP44]]) #[[ATTR5]] +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func4"(i8* [[TMP43]], i8* [[TMP44]]) #[[ATTR2]] // CHECK1-NEXT: br label [[IFCONT:%.*]] // CHECK1: else: // CHECK1-NEXT: br label [[IFCONT]] @@ -1219,10 +1213,10 @@ void test() { // CHECK1-NEXT: [[TMP49:%.*]] = load i8*, i8** [[TMP48]], align 8 // CHECK1-NEXT: [[TMP50:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 // CHECK1-NEXT: [[TMP51:%.*]] = load i8*, i8** [[TMP50]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = bitcast i8* [[TMP49]] to %"class.std::complex.1"* -// CHECK1-NEXT: [[TMP53:%.*]] = bitcast i8* [[TMP51]] to %"class.std::complex.1"* -// CHECK1-NEXT: [[TMP54:%.*]] = bitcast %"class.std::complex.1"* [[TMP53]] to i8* -// CHECK1-NEXT: [[TMP55:%.*]] = bitcast %"class.std::complex.1"* [[TMP52]] to i8* +// CHECK1-NEXT: [[TMP52:%.*]] = bitcast i8* [[TMP49]] to %"class.std::complex.0"* +// CHECK1-NEXT: [[TMP53:%.*]] = bitcast i8* [[TMP51]] to %"class.std::complex.0"* +// CHECK1-NEXT: [[TMP54:%.*]] = bitcast %"class.std::complex.0"* [[TMP53]] to i8* +// CHECK1-NEXT: [[TMP55:%.*]] = bitcast %"class.std::complex.0"* [[TMP52]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP54]], i8* align 8 [[TMP55]], i64 16, i1 false), !tbaa.struct !27 // CHECK1-NEXT: br label [[IFCONT6:%.*]] // CHECK1: else5: @@ -1231,15 +1225,15 @@ void test() { // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func8 +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func6 // CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() // CHECK1-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() // CHECK1-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 @@ -1247,10 +1241,10 @@ void test() { // CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 // CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* -// CHECK1-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[PRECOND:%.*]] // CHECK1: precond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP5]], 4 // CHECK1-NEXT: br i1 [[TMP6]], label [[BODY:%.*]], label [[EXIT:%.*]] // CHECK1: body: @@ -1259,7 +1253,7 @@ void test() { // CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK1: then: // CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 8, !tbaa [[TBAA10]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* // CHECK1-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[TMP9]], i32 [[TMP5]] // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] @@ -1270,29 +1264,29 @@ void test() { // CHECK1-NEXT: br label [[IFCONT]] // CHECK1: ifcont: // CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP13]] // CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK1: then4: // CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] // CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8, !tbaa [[TBAA10]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i32* // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP17]], i32 [[TMP5]] -// CHECK1-NEXT: [[TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4, !tbaa [[TBAA6]] -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: [[TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4, !tbaa [[TBAA8]] +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[IFCONT6:%.*]] // CHECK1: else5: // CHECK1-NEXT: br label [[IFCONT6]] // CHECK1: ifcont6: // CHECK1-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: br label [[PRECOND]] // CHECK1: exit: // CHECK1-NEXT: ret void // // -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper // CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 @@ -1300,105 +1294,105 @@ void test() { // CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 // CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA12]] -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA6]] +// CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]] +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] // CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 // CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8, !tbaa [[TBAA10]] +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1 // CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32** -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8, !tbaa [[TBAA10]] +// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 2 -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex.1"** -// CHECK1-NEXT: [[TMP11:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[TMP10]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex.1"* [[TMP11]]) #[[ATTR5]] +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex.0"** +// CHECK1-NEXT: [[TMP11:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[TMP10]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex.0"* [[TMP11]]) #[[ATTR2]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIfEC2ERKfS2_ -// CHECK1-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]], float* nonnull align 4 dereferenceable(4) [[__RE:%.*]], float* nonnull align 4 dereferenceable(4) [[__IM:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 { +// CHECK1-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]], float* nonnull align 4 dereferenceable(4) [[__RE:%.*]], float* nonnull align 4 dereferenceable(4) [[__IM:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 // CHECK1-NEXT: [[__RE_ADDR:%.*]] = alloca float*, align 8 // CHECK1-NEXT: [[__IM_ADDR:%.*]] = alloca float*, align 8 -// CHECK1-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: store float* [[__RE]], float** [[__RE_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: store float* [[__IM]], float** [[__IM_ADDR]], align 8, !tbaa [[TBAA10]] +// CHECK1-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store float* [[__RE]], float** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store float* [[__IM]], float** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: [[TMP1:%.*]] = load float, float* [[TMP0]], align 4, !tbaa [[TBAA16]] -// CHECK1-NEXT: store float [[TMP1]], float* [[__RE_]], align 4, !tbaa [[TBAA18]] +// CHECK1-NEXT: [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP1:%.*]] = load float, float* [[TMP0]], align 4, !tbaa [[TBAA14]] +// CHECK1-NEXT: store float [[TMP1]], float* [[__RE_]], align 4, !tbaa [[TBAA16]] // CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load float*, float** [[__IM_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[TMP2]], align 4, !tbaa [[TBAA16]] -// CHECK1-NEXT: store float [[TMP3]], float* [[__IM_]], align 4, !tbaa [[TBAA20]] +// CHECK1-NEXT: [[TMP2:%.*]] = load float*, float** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[TMP2]], align 4, !tbaa [[TBAA14]] +// CHECK1-NEXT: store float [[TMP3]], float* [[__IM_]], align 4, !tbaa [[TBAA18]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZNKSt7complexIfE4realEv -// CHECK1-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { +// CHECK1-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 -// CHECK1-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]] +// CHECK1-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA18]] +// CHECK1-NEXT: [[TMP0:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA16]] // CHECK1-NEXT: ret float [[TMP0]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZNKSt7complexIfE4imagEv -// CHECK1-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { +// CHECK1-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { // CHECK1-NEXT: entry: // CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 -// CHECK1-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]] +// CHECK1-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP0:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA20]] +// CHECK1-NEXT: [[TMP0:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA18]] // CHECK1-NEXT: ret float [[TMP0]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZNSt7complexIdEC2ERKdS2_ -// CHECK1-SAME: (%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], double* nonnull align 8 dereferenceable(8) [[__RE:%.*]], double* nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 { +// CHECK1-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], double* nonnull align 8 dereferenceable(8) [[__RE:%.*]], double* nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.1"*, align 8 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 // CHECK1-NEXT: [[__RE_ADDR:%.*]] = alloca double*, align 8 // CHECK1-NEXT: [[__IM_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: store %"class.std::complex.1"* [[THIS]], %"class.std::complex.1"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: store double* [[__RE]], double** [[__RE_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: store double* [[__IM]], double** [[__IM_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8, !tbaa [[TBAA10]] +// CHECK1-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store double* [[__RE]], double** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: store double* [[__IM]], double** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[TMP1:%.*]] = load double, double* [[TMP0]], align 8, !tbaa [[TBAA22]] // CHECK1-NEXT: store double [[TMP1]], double* [[__RE_]], align 8, !tbaa [[TBAA24]] -// CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[__IM_ADDR]], align 8, !tbaa [[TBAA10]] +// CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load double*, double** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK1-NEXT: [[TMP3:%.*]] = load double, double* [[TMP2]], align 8, !tbaa [[TBAA22]] // CHECK1-NEXT: store double [[TMP3]], double* [[__IM_]], align 8, !tbaa [[TBAA26]] // CHECK1-NEXT: ret void // // // CHECK1-LABEL: define {{[^@]+}}@_ZNKSt7complexIdE4realEv -// CHECK1-SAME: (%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { +// CHECK1-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.1"*, align 8 -// CHECK1-NEXT: store %"class.std::complex.1"* [[THIS]], %"class.std::complex.1"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 0 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 +// CHECK1-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 // CHECK1-NEXT: [[TMP0:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24]] // CHECK1-NEXT: ret double [[TMP0]] // // // CHECK1-LABEL: define {{[^@]+}}@_ZNKSt7complexIdE4imagEv -// CHECK1-SAME: (%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { +// CHECK1-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { // CHECK1-NEXT: entry: -// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.1"*, align 8 -// CHECK1-NEXT: store %"class.std::complex.1"* [[THIS]], %"class.std::complex.1"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[THIS_ADDR]], align 8 -// CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 1 +// CHECK1-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 +// CHECK1-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK1-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 +// CHECK1-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 // CHECK1-NEXT: [[TMP0:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26]] // CHECK1-NEXT: ret double [[TMP0]] // @@ -1429,7 +1423,7 @@ void test() { // CHECK2-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*) // CHECK2-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] // CHECK2: .execute.fn: -// CHECK2-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR5:[0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR2:[0-9]+]] // CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] // CHECK2: .check.next: // CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* @@ -1458,7 +1452,7 @@ void test() { // CHECK2-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] // CHECK2-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] // CHECK2: .worker: -// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l19_worker() #[[ATTR5]] +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l19_worker() #[[ATTR2]] // CHECK2-NEXT: br label [[DOTEXIT:%.*]] // CHECK2: .mastercheck: // CHECK2-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() @@ -1475,10 +1469,9 @@ void test() { // CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK2-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA6:![0-9]+]] -// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5]] +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]] +// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] // CHECK2-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] // CHECK2: .termination.notifier: // CHECK2-NEXT: call void @__kmpc_kernel_deinit(i16 1) @@ -1503,133 +1496,131 @@ void test() { // CHECK2-NEXT: [[REF_TMP:%.*]] = alloca float, align 4 // CHECK2-NEXT: [[REF_TMP2:%.*]] = alloca float, align 4 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA10:![0-9]+]] -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2, !tbaa [[TBAA12:![0-9]+]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* @"_openmp_static_kernel$size", align 8, !tbaa [[TBAA14:![0-9]+]] -// CHECK2-NEXT: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i64 [[TMP1]], i16 [[TMP0]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) -// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 0 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty* -// CHECK2-NEXT: [[ISTART:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP4]], i32 0, i32 0 -// CHECK2-NEXT: [[IEND:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], %struct._globalized_locals_ty* [[TMP4]], i32 0, i32 1 -// CHECK2-NEXT: [[PARTIAL_SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], %struct._globalized_locals_ty* [[TMP4]], i32 0, i32 2 -// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR5]] -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP7]]) #[[ATTR5]] -// CHECK2-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR5]] -// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP9:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP9]]) #[[ATTR5]] -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i32* [[IB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP10]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 99 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12:![0-9]+]] +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[ISTART:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK2-NEXT: [[ISTART_ON_STACK:%.*]] = bitcast i8* [[ISTART]] to i32* +// CHECK2-NEXT: [[IEND:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK2-NEXT: [[IEND_ON_STACK:%.*]] = bitcast i8* [[IEND]] to i32* +// CHECK2-NEXT: [[PARTIAL_SUM:%.*]] = call i8* @__kmpc_alloc_shared(i64 8) +// CHECK2-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex"* +// CHECK2-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR2]] +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP2]]) #[[ATTR2]] +// CHECK2-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR2]] +// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR2]] +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i32* [[IB]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 99 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] -// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[IB]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP19:%.*]] = bitcast float* [[REF_TMP]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR5]] -// CHECK2-NEXT: store float 0.000000e+00, float* [[REF_TMP]], align 4, !tbaa [[TBAA16:![0-9]+]] -// CHECK2-NEXT: [[TMP20:%.*]] = bitcast float* [[REF_TMP2]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR5]] -// CHECK2-NEXT: store float 0.000000e+00, float* [[REF_TMP2]], align 4, !tbaa [[TBAA16]] -// CHECK2-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR7:[0-9]+]] -// CHECK2-NEXT: [[TMP21:%.*]] = bitcast float* [[REF_TMP2]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP21]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP22:%.*]] = bitcast float* [[REF_TMP]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP22]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP23]], 4 -// CHECK2-NEXT: store i32 [[MUL3]], i32* [[ISTART]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[IB]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP14:%.*]] = bitcast float* [[REF_TMP]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP14]]) #[[ATTR2]] +// CHECK2-NEXT: store float 0.000000e+00, float* [[REF_TMP]], align 4, !tbaa [[TBAA14:![0-9]+]] +// CHECK2-NEXT: [[TMP15:%.*]] = bitcast float* [[REF_TMP2]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP15]]) #[[ATTR2]] +// CHECK2-NEXT: store float 0.000000e+00, float* [[REF_TMP2]], align 4, !tbaa [[TBAA14]] +// CHECK2-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM_ON_STACK]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR8:[0-9]+]] +// CHECK2-NEXT: [[TMP16:%.*]] = bitcast float* [[REF_TMP2]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP17:%.*]] = bitcast float* [[REF_TMP]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP18]], 4 +// CHECK2-NEXT: store i32 [[MUL3]], i32* [[ISTART_ON_STACK]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK2-NEXT: [[MUL5:%.*]] = mul nsw i32 [[ADD4]], 4 -// CHECK2-NEXT: store i32 [[MUL5]], i32* [[IEND]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP26:%.*]] = bitcast i32* [[ISTART]] to i8* -// CHECK2-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP28:%.*]] = bitcast i32* [[IEND]] to i8* -// CHECK2-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP30:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM]] to i8* -// CHECK2-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: [[TMP31:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i32*, %"class.std::complex"*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP31]], i64 3) +// CHECK2-NEXT: store i32 [[MUL5]], i32* [[IEND_ON_STACK]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP21:%.*]] = bitcast i32* [[ISTART_ON_STACK]] to i8* +// CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK2-NEXT: [[TMP23:%.*]] = bitcast i32* [[IEND_ON_STACK]] to i8* +// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK2-NEXT: [[TMP25:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM_ON_STACK]] to i8* +// CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[TMP26:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i32*, %"class.std::complex"*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP26]], i64 3) // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], 1 -// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) -// CHECK2-NEXT: [[TMP33:%.*]] = bitcast i32* [[IB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP34:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP34]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP35:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP35]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP36:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP36]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP37:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP37]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP38:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP38]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP39:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2, !tbaa [[TBAA12]] -// CHECK2-NEXT: call void @__kmpc_restore_team_static_memory(i16 0, i16 [[TMP39]]) +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) +// CHECK2-NEXT: [[TMP28:%.*]] = bitcast i32* [[IB]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP28]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP29:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP29]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP30:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP30]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP31:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP31]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP32:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP33:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[IEND]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@_ZNSt7complexIfEC1ERKfS2_ -// CHECK2-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]], float* nonnull align 4 dereferenceable(4) [[__RE:%.*]], float* nonnull align 4 dereferenceable(4) [[__IM:%.*]]) unnamed_addr #[[ATTR3:[0-9]+]] comdat align 2 { +// CHECK2-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]], float* nonnull align 4 dereferenceable(4) [[__RE:%.*]], float* nonnull align 4 dereferenceable(4) [[__IM:%.*]]) unnamed_addr #[[ATTR4:[0-9]+]] comdat align 2 { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 // CHECK2-NEXT: [[__RE_ADDR:%.*]] = alloca float*, align 8 // CHECK2-NEXT: [[__IM_ADDR:%.*]] = alloca float*, align 8 -// CHECK2-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: store float* [[__RE]], float** [[__RE_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: store float* [[__IM]], float** [[__IM_ADDR]], align 8, !tbaa [[TBAA10]] +// CHECK2-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: store float* [[__RE]], float** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: store float* [[__IM]], float** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8 // CHECK2-NEXT: [[TMP1:%.*]] = load float*, float** [[__IM_ADDR]], align 8 -// CHECK2-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR7]] +// CHECK2-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR8]] // CHECK2-NEXT: ret void // // @@ -1659,95 +1650,95 @@ void test() { // CHECK2-NEXT: [[REF_TMP15:%.*]] = alloca float, align 4 // CHECK2-NEXT: [[REF_TMP16:%.*]] = alloca float, align 4 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: store i32* [[ISTART]], i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: store i32* [[IEND]], i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: store %"class.std::complex"* [[PARTIAL_SUM]], %"class.std::complex"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA10]] +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: store i32* [[ISTART]], i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: store i32* [[IEND]], i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: store %"class.std::complex"* [[PARTIAL_SUM]], %"class.std::complex"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP6:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP9]], [[TMP10]] // CHECK2-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 -// CHECK2-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP11:%.*]] = bitcast i32* [[I]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: store i32 [[TMP12]], i32* [[I]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: store i32 [[TMP12]], i32* [[I]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP13:%.*]] = bitcast i32* [[I]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: [[TMP16:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR5]] -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR2]] +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP17:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP19:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR5]] -// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR2]] +// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR5]] -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR2]] +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP21:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP21]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP21]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP22:%.*]] = bitcast float* [[REF_TMP]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP22]]) #[[ATTR5]] -// CHECK2-NEXT: store float 0.000000e+00, float* [[REF_TMP]], align 4, !tbaa [[TBAA16]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP22]]) #[[ATTR2]] +// CHECK2-NEXT: store float 0.000000e+00, float* [[REF_TMP]], align 4, !tbaa [[TBAA14]] // CHECK2-NEXT: [[TMP23:%.*]] = bitcast float* [[REF_TMP6]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP23]]) #[[ATTR5]] -// CHECK2-NEXT: store float 0.000000e+00, float* [[REF_TMP6]], align 4, !tbaa [[TBAA16]] -// CHECK2-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR7]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP23]]) #[[ATTR2]] +// CHECK2-NEXT: store float 0.000000e+00, float* [[REF_TMP6]], align 4, !tbaa [[TBAA14]] +// CHECK2-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR8]] // CHECK2-NEXT: [[TMP24:%.*]] = bitcast float* [[REF_TMP6]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP24]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP24]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP25:%.*]] = bitcast float* [[REF_TMP]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP25]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP25]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP26:%.*]] = bitcast i32* [[I7]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP28]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[TMP29]], [[TMP30]] // CHECK2-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: // CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE]] ], [ [[TMP32]], [[COND_FALSE]] ] -// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[ADD9:%.*]] = add i32 [[TMP35]], 1 // CHECK2-NEXT: [[CMP10:%.*]] = icmp ult i32 [[TMP34]], [[ADD9]] // CHECK2-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] @@ -1756,65 +1747,65 @@ void test() { // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[ADD11:%.*]] = add i32 [[TMP37]], 1 // CHECK2-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP36]], [[ADD11]] // CHECK2-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP39]], 1 // CHECK2-NEXT: [[ADD13:%.*]] = add i32 [[TMP38]], [[MUL]] -// CHECK2-NEXT: store i32 [[ADD13]], i32* [[I7]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: store i32 [[ADD13]], i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP40:%.*]] = bitcast %"class.std::complex"* [[REF_TMP14]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP40]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP40]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP41:%.*]] = bitcast float* [[REF_TMP15]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP41]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP41]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP42]] to float -// CHECK2-NEXT: store float [[CONV]], float* [[REF_TMP15]], align 4, !tbaa [[TBAA16]] +// CHECK2-NEXT: store float [[CONV]], float* [[REF_TMP15]], align 4, !tbaa [[TBAA14]] // CHECK2-NEXT: [[TMP43:%.*]] = bitcast float* [[REF_TMP16]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP43]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP43]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to float -// CHECK2-NEXT: store float [[CONV17]], float* [[REF_TMP16]], align 4, !tbaa [[TBAA16]] -// CHECK2-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR7]] -// CHECK2-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR7]] +// CHECK2-NEXT: store float [[CONV17]], float* [[REF_TMP16]], align 4, !tbaa [[TBAA14]] +// CHECK2-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR8]] +// CHECK2-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR8]] // CHECK2-NEXT: [[TMP45:%.*]] = bitcast float* [[REF_TMP16]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP45]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP45]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP46:%.*]] = bitcast float* [[REF_TMP15]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP46]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP46]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex"* [[REF_TMP14]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP47]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP47]]) #[[ATTR2]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[ADD18:%.*]] = add i32 [[TMP48]], 1 -// CHECK2-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: -// CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[ADD19:%.*]] = add i32 [[TMP49]], [[TMP50]] -// CHECK2-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[ADD20:%.*]] = add i32 [[TMP51]], [[TMP52]] -// CHECK2-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: // CHECK2-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP54]]) // CHECK2-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP57:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK2-NEXT: [[TMP58:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8* // CHECK2-NEXT: store i8* [[TMP58]], i8** [[TMP57]], align 8 @@ -1823,55 +1814,55 @@ void test() { // CHECK2-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 // CHECK2-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK2: .omp.reduction.then: -// CHECK2-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR7]] +// CHECK2-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR8]] // CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK2: .omp.reduction.done: // CHECK2-NEXT: [[TMP62:%.*]] = bitcast i32* [[I7]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP63:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP63]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP63]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP64:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP65:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP66:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP67:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR2]] // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: [[TMP68:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP69:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP70:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP71:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR2]] // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@_ZNSt7complexIfEpLIfEERS0_RKS_IT_E -// CHECK2-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[__C:%.*]]) #[[ATTR4:[0-9]+]] comdat align 2 { +// CHECK2-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[__C:%.*]]) #[[ATTR5:[0-9]+]] comdat align 2 { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 // CHECK2-NEXT: [[__C_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 -// CHECK2-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: store %"class.std::complex"* [[__C]], %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]] +// CHECK2-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: store %"class.std::complex"* [[__C]], %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR7]] +// CHECK2-NEXT: [[TMP0:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR8]] // CHECK2-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP1:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA18:![0-9]+]] +// CHECK2-NEXT: [[TMP1:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA16:![0-9]+]] // CHECK2-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[CALL]] -// CHECK2-NEXT: store float [[ADD]], float* [[__RE_]], align 4, !tbaa [[TBAA18]] -// CHECK2-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR7]] +// CHECK2-NEXT: store float [[ADD]], float* [[__RE_]], align 4, !tbaa [[TBAA16]] +// CHECK2-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR8]] // CHECK2-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP3:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA20:![0-9]+]] +// CHECK2-NEXT: [[TMP3:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA18:![0-9]+]] // CHECK2-NEXT: [[ADD3:%.*]] = fadd float [[TMP3]], [[CALL2]] -// CHECK2-NEXT: store float [[ADD3]], float* [[__IM_]], align 4, !tbaa [[TBAA20]] +// CHECK2-NEXT: store float [[ADD3]], float* [[__IM_]], align 4, !tbaa [[TBAA18]] // CHECK2-NEXT: ret %"class.std::complex"* [[THIS1]] // // @@ -1884,15 +1875,15 @@ void test() { // CHECK2-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 // CHECK2-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca %"class.std::complex", align 4 -// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2, !tbaa [[TBAA12]] -// CHECK2-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2, !tbaa [[TBAA12]] -// CHECK2-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2, !tbaa [[TBAA12]] -// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !tbaa [[TBAA10]] +// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2, !tbaa [[TBAA19:![0-9]+]] +// CHECK2-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] +// CHECK2-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] +// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* -// CHECK2-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2, !tbaa [[TBAA12]] -// CHECK2-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2, !tbaa [[TBAA12]] -// CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2, !tbaa [[TBAA19]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 // CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 @@ -1909,7 +1900,7 @@ void test() { // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr i64, i64* [[TMP15]], i64 1 // CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP16]], i64 1 // CHECK2-NEXT: [[TMP22:%.*]] = bitcast %"class.std::complex"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK2-NEXT: store i8* [[TMP22]], i8** [[TMP11]], align 8, !tbaa [[TBAA10]] +// CHECK2-NEXT: store i8* [[TMP22]], i8** [[TMP11]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[TMP23:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK2-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK2-NEXT: [[TMP25:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -1926,7 +1917,7 @@ void test() { // CHECK2: then: // CHECK2-NEXT: [[TMP35:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* // CHECK2-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR5]] +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR2]] // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] @@ -1959,8 +1950,8 @@ void test() { // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() // CHECK2-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() // CHECK2-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 @@ -1968,10 +1959,10 @@ void test() { // CHECK2-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 // CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* -// CHECK2-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: br label [[PRECOND:%.*]] // CHECK2: precond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP5]], 2 // CHECK2-NEXT: br i1 [[TMP6]], label [[BODY:%.*]], label [[EXIT:%.*]] // CHECK2: body: @@ -1980,7 +1971,7 @@ void test() { // CHECK2-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK2: then: // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 8, !tbaa [[TBAA10]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* // CHECK2-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[TMP9]], i32 [[TMP5]] // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] @@ -1991,23 +1982,23 @@ void test() { // CHECK2-NEXT: br label [[IFCONT]] // CHECK2: ifcont: // CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP13]] // CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK2: then4: // CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] // CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8, !tbaa [[TBAA10]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i32* // CHECK2-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP17]], i32 [[TMP5]] -// CHECK2-NEXT: [[TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: br label [[IFCONT6:%.*]] // CHECK2: else5: // CHECK2-NEXT: br label [[IFCONT6]] // CHECK2: ifcont6: // CHECK2-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK2-NEXT: store i32 [[TMP20]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: store i32 [[TMP20]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: br label [[PRECOND]] // CHECK2: exit: // CHECK2-NEXT: ret void @@ -2021,20 +2012,20 @@ void test() { // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA12]] -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]] +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** -// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8, !tbaa [[TBAA10]] +// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1 // CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32** -// CHECK2-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8, !tbaa [[TBAA10]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 2 // CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex"** -// CHECK2-NEXT: [[TMP11:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[TMP10]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex"* [[TMP11]]) #[[ATTR5]] +// CHECK2-NEXT: [[TMP11:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[TMP10]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex"* [[TMP11]]) #[[ATTR2]] // CHECK2-NEXT: ret void // // @@ -2061,10 +2052,10 @@ void test() { // CHECK2: .execute.parallel: // CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK2-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK2-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*) +// CHECK2-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*) // CHECK2-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] // CHECK2: .execute.fn: -// CHECK2-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR5]] +// CHECK2-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR2]] // CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] // CHECK2: .check.next: // CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* @@ -2093,7 +2084,7 @@ void test() { // CHECK2-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] // CHECK2-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] // CHECK2: .worker: -// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l19_worker() #[[ATTR5]] +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l19_worker() #[[ATTR2]] // CHECK2-NEXT: br label [[DOTEXIT:%.*]] // CHECK2: .mastercheck: // CHECK2-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() @@ -2110,10 +2101,9 @@ void test() { // CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK2-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5]] +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] // CHECK2-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] // CHECK2: .termination.notifier: // CHECK2-NEXT: call void @__kmpc_kernel_deinit(i16 1) @@ -2138,144 +2128,142 @@ void test() { // CHECK2-NEXT: [[REF_TMP:%.*]] = alloca double, align 8 // CHECK2-NEXT: [[REF_TMP2:%.*]] = alloca double, align 8 // CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared3", align 2, !tbaa [[TBAA12]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* @"_openmp_static_kernel$size4", align 8, !tbaa [[TBAA14]] -// CHECK2-NEXT: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i64 [[TMP1]], i16 [[TMP0]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) -// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 0 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.0* -// CHECK2-NEXT: [[ISTART:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 1 -// CHECK2-NEXT: [[IEND:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 2 -// CHECK2-NEXT: [[PARTIAL_SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR5]] -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP7]]) #[[ATTR5]] -// CHECK2-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR5]] -// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP9:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP9]]) #[[ATTR5]] -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i32* [[IB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP10]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 99 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[ISTART:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK2-NEXT: [[ISTART_ON_STACK:%.*]] = bitcast i8* [[ISTART]] to i32* +// CHECK2-NEXT: [[IEND:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK2-NEXT: [[IEND_ON_STACK:%.*]] = bitcast i8* [[IEND]] to i32* +// CHECK2-NEXT: [[PARTIAL_SUM:%.*]] = call i8* @__kmpc_alloc_shared(i64 16) +// CHECK2-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex.0"* +// CHECK2-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR2]] +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP2]]) #[[ATTR2]] +// CHECK2-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR2]] +// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR2]] +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i32* [[IB]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 99 // CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] -// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[IB]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP19:%.*]] = bitcast double* [[REF_TMP]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP19]]) #[[ATTR5]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[IB]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP14:%.*]] = bitcast double* [[REF_TMP]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP14]]) #[[ATTR2]] // CHECK2-NEXT: store double 0.000000e+00, double* [[REF_TMP]], align 8, !tbaa [[TBAA22:![0-9]+]] -// CHECK2-NEXT: [[TMP20:%.*]] = bitcast double* [[REF_TMP2]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP20]]) #[[ATTR5]] +// CHECK2-NEXT: [[TMP15:%.*]] = bitcast double* [[REF_TMP2]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP15]]) #[[ATTR2]] // CHECK2-NEXT: store double 0.000000e+00, double* [[REF_TMP2]], align 8, !tbaa [[TBAA22]] -// CHECK2-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR7]] -// CHECK2-NEXT: [[TMP21:%.*]] = bitcast double* [[REF_TMP2]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP21]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP22:%.*]] = bitcast double* [[REF_TMP]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP22]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP23]], 4 -// CHECK2-NEXT: store i32 [[MUL3]], i32* [[ISTART]], align 8, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK2-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM_ON_STACK]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR8]] +// CHECK2-NEXT: [[TMP16:%.*]] = bitcast double* [[REF_TMP2]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP16]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP17:%.*]] = bitcast double* [[REF_TMP]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP17]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP18]], 4 +// CHECK2-NEXT: store i32 [[MUL3]], i32* [[ISTART_ON_STACK]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK2-NEXT: [[MUL5:%.*]] = mul nsw i32 [[ADD4]], 4 -// CHECK2-NEXT: store i32 [[MUL5]], i32* [[IEND]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP26:%.*]] = bitcast i32* [[ISTART]] to i8* -// CHECK2-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP28:%.*]] = bitcast i32* [[IEND]] to i8* -// CHECK2-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP30:%.*]] = bitcast %"class.std::complex.1"* [[PARTIAL_SUM]] to i8* -// CHECK2-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: [[TMP31:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i32*, %"class.std::complex.1"*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** [[TMP31]], i64 3) +// CHECK2-NEXT: store i32 [[MUL5]], i32* [[IEND_ON_STACK]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP21:%.*]] = bitcast i32* [[ISTART_ON_STACK]] to i8* +// CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK2-NEXT: [[TMP23:%.*]] = bitcast i32* [[IEND_ON_STACK]] to i8* +// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK2-NEXT: [[TMP25:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM_ON_STACK]] to i8* +// CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[TMP26:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i32*, %"class.std::complex.0"*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP26]], i64 3) // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], 1 -// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) -// CHECK2-NEXT: [[TMP33:%.*]] = bitcast i32* [[IB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP34:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP34]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP35:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP35]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP36:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP36]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP37:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP37]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP38:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP38]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP39:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared3", align 2, !tbaa [[TBAA12]] -// CHECK2-NEXT: call void @__kmpc_restore_team_static_memory(i16 0, i16 [[TMP39]]) +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) +// CHECK2-NEXT: [[TMP28:%.*]] = bitcast i32* [[IB]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP28]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP29:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP29]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP30:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP30]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP31:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP31]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP32:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP33:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR2]] +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[IEND]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]]) // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@_ZNSt7complexIdEC1ERKdS2_ -// CHECK2-SAME: (%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], double* nonnull align 8 dereferenceable(8) [[__RE:%.*]], double* nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 { +// CHECK2-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], double* nonnull align 8 dereferenceable(8) [[__RE:%.*]], double* nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.1"*, align 8 +// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 // CHECK2-NEXT: [[__RE_ADDR:%.*]] = alloca double*, align 8 // CHECK2-NEXT: [[__IM_ADDR:%.*]] = alloca double*, align 8 -// CHECK2-NEXT: store %"class.std::complex.1"* [[THIS]], %"class.std::complex.1"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: store double* [[__RE]], double** [[__RE_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: store double* [[__IM]], double** [[__IM_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[THIS_ADDR]], align 8 +// CHECK2-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: store double* [[__RE]], double** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: store double* [[__IM]], double** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8 // CHECK2-NEXT: [[TMP1:%.*]] = load double*, double** [[__IM_ADDR]], align 8 -// CHECK2-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR7]] +// CHECK2-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR8]] // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ISTART:%.*]], i32* nonnull align 4 dereferenceable(4) [[IEND:%.*]], %"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM:%.*]]) #[[ATTR0]] { +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__3 +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ISTART:%.*]], i32* nonnull align 4 dereferenceable(4) [[IEND:%.*]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM:%.*]]) #[[ATTR0]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[ISTART_ADDR:%.*]] = alloca i32*, align 8 // CHECK2-NEXT: [[IEND_ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[PARTIAL_SUM_ADDR:%.*]] = alloca %"class.std::complex.1"*, align 8 +// CHECK2-NEXT: [[PARTIAL_SUM_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 // CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -2286,103 +2274,103 @@ void test() { // CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[PARTIAL_SUM5:%.*]] = alloca %"class.std::complex.1", align 8 +// CHECK2-NEXT: [[PARTIAL_SUM5:%.*]] = alloca %"class.std::complex.0", align 8 // CHECK2-NEXT: [[REF_TMP:%.*]] = alloca double, align 8 // CHECK2-NEXT: [[REF_TMP6:%.*]] = alloca double, align 8 // CHECK2-NEXT: [[I7:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[REF_TMP14:%.*]] = alloca %"class.std::complex.1", align 8 +// CHECK2-NEXT: [[REF_TMP14:%.*]] = alloca %"class.std::complex.0", align 8 // CHECK2-NEXT: [[REF_TMP15:%.*]] = alloca double, align 8 // CHECK2-NEXT: [[REF_TMP16:%.*]] = alloca double, align 8 // CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: store i32* [[ISTART]], i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: store i32* [[IEND]], i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: store %"class.std::complex.1"* [[PARTIAL_SUM]], %"class.std::complex.1"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: [[TMP2:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA10]] +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: store i32* [[ISTART]], i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: store i32* [[IEND]], i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: store %"class.std::complex.0"* [[PARTIAL_SUM]], %"class.std::complex.0"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[TMP2:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP6:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[SUB:%.*]] = sub i32 [[TMP9]], [[TMP10]] // CHECK2-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK2-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK2-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 -// CHECK2-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP11:%.*]] = bitcast i32* [[I]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: store i32 [[TMP12]], i32* [[I]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: store i32 [[TMP12]], i32* [[I]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP13:%.*]] = bitcast i32* [[I]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK2: omp.precond.then: // CHECK2-NEXT: [[TMP16:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR5]] -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR2]] +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP17:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP19:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR5]] -// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR2]] +// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR5]] -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP21:%.*]] = bitcast %"class.std::complex.1"* [[PARTIAL_SUM5]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP21]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR2]] +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP21:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM5]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP21]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP22:%.*]] = bitcast double* [[REF_TMP]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP22]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP22]]) #[[ATTR2]] // CHECK2-NEXT: store double 0.000000e+00, double* [[REF_TMP]], align 8, !tbaa [[TBAA22]] // CHECK2-NEXT: [[TMP23:%.*]] = bitcast double* [[REF_TMP6]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP23]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP23]]) #[[ATTR2]] // CHECK2-NEXT: store double 0.000000e+00, double* [[REF_TMP6]], align 8, !tbaa [[TBAA22]] -// CHECK2-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR7]] +// CHECK2-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR8]] // CHECK2-NEXT: [[TMP24:%.*]] = bitcast double* [[REF_TMP6]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP24]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP24]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP25:%.*]] = bitcast double* [[REF_TMP]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP25]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP25]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP26:%.*]] = bitcast i32* [[I7]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB2]], i32 [[TMP28]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[TMP29]], [[TMP30]] // CHECK2-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK2: cond.true: -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: br label [[COND_END:%.*]] // CHECK2: cond.false: -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: br label [[COND_END]] // CHECK2: cond.end: // CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE]] ], [ [[TMP32]], [[COND_FALSE]] ] -// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[ADD9:%.*]] = add i32 [[TMP35]], 1 // CHECK2-NEXT: [[CMP10:%.*]] = icmp ult i32 [[TMP34]], [[ADD9]] // CHECK2-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] @@ -2391,126 +2379,126 @@ void test() { // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[ADD11:%.*]] = add i32 [[TMP37]], 1 // CHECK2-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP36]], [[ADD11]] // CHECK2-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK2: omp.inner.for.cond.cleanup: // CHECK2-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP39]], 1 // CHECK2-NEXT: [[ADD13:%.*]] = add i32 [[TMP38]], [[MUL]] -// CHECK2-NEXT: store i32 [[ADD13]], i32* [[I7]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP40:%.*]] = bitcast %"class.std::complex.1"* [[REF_TMP14]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP40]]) #[[ATTR5]] +// CHECK2-NEXT: store i32 [[ADD13]], i32* [[I7]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP40:%.*]] = bitcast %"class.std::complex.0"* [[REF_TMP14]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP40]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP41:%.*]] = bitcast double* [[REF_TMP15]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP41]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP41]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP42]] to double // CHECK2-NEXT: store double [[CONV]], double* [[REF_TMP15]], align 8, !tbaa [[TBAA22]] // CHECK2-NEXT: [[TMP43:%.*]] = bitcast double* [[REF_TMP16]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP43]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP43]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to double // CHECK2-NEXT: store double [[CONV17]], double* [[REF_TMP16]], align 8, !tbaa [[TBAA22]] -// CHECK2-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR7]] -// CHECK2-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.1"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR7]] +// CHECK2-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR8]] +// CHECK2-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR8]] // CHECK2-NEXT: [[TMP45:%.*]] = bitcast double* [[REF_TMP16]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP45]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP45]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP46:%.*]] = bitcast double* [[REF_TMP15]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP46]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex.1"* [[REF_TMP14]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP47]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP46]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex.0"* [[REF_TMP14]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP47]]) #[[ATTR2]] // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[ADD18:%.*]] = add i32 [[TMP48]], 1 -// CHECK2-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: -// CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[ADD19:%.*]] = add i32 [[TMP49]], [[TMP50]] -// CHECK2-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[ADD20:%.*]] = add i32 [[TMP51]], [[TMP52]] -// CHECK2-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK2: omp.dispatch.end: // CHECK2-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP54]]) // CHECK2-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP57:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP58:%.*]] = bitcast %"class.std::complex.1"* [[PARTIAL_SUM5]] to i8* +// CHECK2-NEXT: [[TMP58:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM5]] to i8* // CHECK2-NEXT: store i8* [[TMP58]], i8** [[TMP57]], align 8 // CHECK2-NEXT: [[TMP59:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK2-NEXT: [[TMP60:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP56]], i32 1, i64 8, i8* [[TMP59]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func7, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func8) +// CHECK2-NEXT: [[TMP60:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP56]], i32 1, i64 8, i8* [[TMP59]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func5, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func6) // CHECK2-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 // CHECK2-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK2: .omp.reduction.then: -// CHECK2-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.1"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR7]] +// CHECK2-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR8]] // CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) // CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK2: .omp.reduction.done: // CHECK2-NEXT: [[TMP62:%.*]] = bitcast i32* [[I7]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP63:%.*]] = bitcast %"class.std::complex.1"* [[PARTIAL_SUM5]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP63]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR2]] +// CHECK2-NEXT: [[TMP63:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM5]] to i8* +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP63]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP64:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP65:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP66:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP67:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR2]] // CHECK2-NEXT: br label [[OMP_PRECOND_END]] // CHECK2: omp.precond.end: // CHECK2-NEXT: [[TMP68:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP69:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP70:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP71:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR5]] +// CHECK2-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR2]] // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@_ZNSt7complexIdEpLIdEERS0_RKS_IT_E -// CHECK2-SAME: (%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], %"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[__C:%.*]]) #[[ATTR4]] comdat align 2 { +// CHECK2-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[__C:%.*]]) #[[ATTR5]] comdat align 2 { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.1"*, align 8 -// CHECK2-NEXT: [[__C_ADDR:%.*]] = alloca %"class.std::complex.1"*, align 8 -// CHECK2-NEXT: store %"class.std::complex.1"* [[THIS]], %"class.std::complex.1"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: store %"class.std::complex.1"* [[__C]], %"class.std::complex.1"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR7]] -// CHECK2-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 0 +// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 +// CHECK2-NEXT: [[__C_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 +// CHECK2-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: store %"class.std::complex.0"* [[__C]], %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR8]] +// CHECK2-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 // CHECK2-NEXT: [[TMP1:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24:![0-9]+]] // CHECK2-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[CALL]] // CHECK2-NEXT: store double [[ADD]], double* [[__RE_]], align 8, !tbaa [[TBAA24]] -// CHECK2-NEXT: [[TMP2:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR7]] -// CHECK2-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP2:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR8]] +// CHECK2-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 // CHECK2-NEXT: [[TMP3:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26:![0-9]+]] // CHECK2-NEXT: [[ADD3:%.*]] = fadd double [[TMP3]], [[CALL2]] // CHECK2-NEXT: store double [[ADD3]], double* [[__IM_]], align 8, !tbaa [[TBAA26]] -// CHECK2-NEXT: ret %"class.std::complex.1"* [[THIS1]] +// CHECK2-NEXT: ret %"class.std::complex.0"* [[THIS1]] // // -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func7 +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func5 // CHECK2-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 @@ -2518,24 +2506,24 @@ void test() { // CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 // CHECK2-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 // CHECK2-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 8 -// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca %"class.std::complex.1", align 8 -// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2, !tbaa [[TBAA12]] -// CHECK2-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2, !tbaa [[TBAA12]] -// CHECK2-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2, !tbaa [[TBAA12]] -// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !tbaa [[TBAA10]] +// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca %"class.std::complex.0", align 8 +// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2, !tbaa [[TBAA19]] +// CHECK2-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] +// CHECK2-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] +// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* -// CHECK2-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2, !tbaa [[TBAA12]] -// CHECK2-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2, !tbaa [[TBAA12]] -// CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2, !tbaa [[TBAA19]] +// CHECK2-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 // CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to %"class.std::complex.1"* -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr %"class.std::complex.1", %"class.std::complex.1"* [[TMP12]], i64 1 -// CHECK2-NEXT: [[TMP14:%.*]] = bitcast %"class.std::complex.1"* [[TMP13]] to i8* -// CHECK2-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex.1"* [[TMP12]] to i64* -// CHECK2-NEXT: [[TMP16:%.*]] = bitcast %"class.std::complex.1"* [[DOTOMP_REDUCTION_ELEMENT]] to i64* +// CHECK2-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to %"class.std::complex.0"* +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr %"class.std::complex.0", %"class.std::complex.0"* [[TMP12]], i64 1 +// CHECK2-NEXT: [[TMP14:%.*]] = bitcast %"class.std::complex.0"* [[TMP13]] to i8* +// CHECK2-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex.0"* [[TMP12]] to i64* +// CHECK2-NEXT: [[TMP16:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i64* // CHECK2-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] // CHECK2: .shuffle.pre_cond: // CHECK2-NEXT: [[TMP17:%.*]] = phi i64* [ [[TMP15]], [[ENTRY:%.*]] ], [ [[TMP28:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] @@ -2557,8 +2545,8 @@ void test() { // CHECK2-NEXT: [[TMP29]] = getelementptr i64, i64* [[TMP18]], i64 1 // CHECK2-NEXT: br label [[DOTSHUFFLE_PRE_COND]] // CHECK2: .shuffle.exit: -// CHECK2-NEXT: [[TMP30:%.*]] = bitcast %"class.std::complex.1"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK2-NEXT: store i8* [[TMP30]], i8** [[TMP11]], align 8, !tbaa [[TBAA10]] +// CHECK2-NEXT: [[TMP30:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK2-NEXT: store i8* [[TMP30]], i8** [[TMP11]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK2-NEXT: [[TMP32:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK2-NEXT: [[TMP33:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -2575,7 +2563,7 @@ void test() { // CHECK2: then: // CHECK2-NEXT: [[TMP43:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* // CHECK2-NEXT: [[TMP44:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func6"(i8* [[TMP43]], i8* [[TMP44]]) #[[ATTR5]] +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func4"(i8* [[TMP43]], i8* [[TMP44]]) #[[ATTR2]] // CHECK2-NEXT: br label [[IFCONT:%.*]] // CHECK2: else: // CHECK2-NEXT: br label [[IFCONT]] @@ -2589,10 +2577,10 @@ void test() { // CHECK2-NEXT: [[TMP49:%.*]] = load i8*, i8** [[TMP48]], align 8 // CHECK2-NEXT: [[TMP50:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 // CHECK2-NEXT: [[TMP51:%.*]] = load i8*, i8** [[TMP50]], align 8 -// CHECK2-NEXT: [[TMP52:%.*]] = bitcast i8* [[TMP49]] to %"class.std::complex.1"* -// CHECK2-NEXT: [[TMP53:%.*]] = bitcast i8* [[TMP51]] to %"class.std::complex.1"* -// CHECK2-NEXT: [[TMP54:%.*]] = bitcast %"class.std::complex.1"* [[TMP53]] to i8* -// CHECK2-NEXT: [[TMP55:%.*]] = bitcast %"class.std::complex.1"* [[TMP52]] to i8* +// CHECK2-NEXT: [[TMP52:%.*]] = bitcast i8* [[TMP49]] to %"class.std::complex.0"* +// CHECK2-NEXT: [[TMP53:%.*]] = bitcast i8* [[TMP51]] to %"class.std::complex.0"* +// CHECK2-NEXT: [[TMP54:%.*]] = bitcast %"class.std::complex.0"* [[TMP53]] to i8* +// CHECK2-NEXT: [[TMP55:%.*]] = bitcast %"class.std::complex.0"* [[TMP52]] to i8* // CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP54]], i8* align 8 [[TMP55]], i64 16, i1 false), !tbaa.struct !27 // CHECK2-NEXT: br label [[IFCONT6:%.*]] // CHECK2: else5: @@ -2601,15 +2589,15 @@ void test() { // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func8 +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func6 // CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() // CHECK2-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() // CHECK2-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 @@ -2617,10 +2605,10 @@ void test() { // CHECK2-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 // CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* -// CHECK2-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: br label [[PRECOND:%.*]] // CHECK2: precond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP5]], 4 // CHECK2-NEXT: br i1 [[TMP6]], label [[BODY:%.*]], label [[EXIT:%.*]] // CHECK2: body: @@ -2629,7 +2617,7 @@ void test() { // CHECK2-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK2: then: // CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 8, !tbaa [[TBAA10]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* // CHECK2-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[TMP9]], i32 [[TMP5]] // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] @@ -2640,29 +2628,29 @@ void test() { // CHECK2-NEXT: br label [[IFCONT]] // CHECK2: ifcont: // CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP13]] // CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK2: then4: // CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] // CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8, !tbaa [[TBAA10]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i32* // CHECK2-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP17]], i32 [[TMP5]] -// CHECK2-NEXT: [[TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4, !tbaa [[TBAA6]] -// CHECK2-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: [[TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4, !tbaa [[TBAA8]] +// CHECK2-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: br label [[IFCONT6:%.*]] // CHECK2: else5: // CHECK2-NEXT: br label [[IFCONT6]] // CHECK2: ifcont6: // CHECK2-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK2-NEXT: store i32 [[TMP20]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: store i32 [[TMP20]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: br label [[PRECOND]] // CHECK2: exit: // CHECK2-NEXT: ret void // // -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper // CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 @@ -2670,105 +2658,105 @@ void test() { // CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 // CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA12]] -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA6]] +// CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]] +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] // CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 // CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** -// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8, !tbaa [[TBAA10]] +// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1 // CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32** -// CHECK2-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8, !tbaa [[TBAA10]] +// CHECK2-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 2 -// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex.1"** -// CHECK2-NEXT: [[TMP11:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[TMP10]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex.1"* [[TMP11]]) #[[ATTR5]] +// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex.0"** +// CHECK2-NEXT: [[TMP11:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[TMP10]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex.0"* [[TMP11]]) #[[ATTR2]] // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@_ZNSt7complexIfEC2ERKfS2_ -// CHECK2-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]], float* nonnull align 4 dereferenceable(4) [[__RE:%.*]], float* nonnull align 4 dereferenceable(4) [[__IM:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 { +// CHECK2-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]], float* nonnull align 4 dereferenceable(4) [[__RE:%.*]], float* nonnull align 4 dereferenceable(4) [[__IM:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 // CHECK2-NEXT: [[__RE_ADDR:%.*]] = alloca float*, align 8 // CHECK2-NEXT: [[__IM_ADDR:%.*]] = alloca float*, align 8 -// CHECK2-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: store float* [[__RE]], float** [[__RE_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: store float* [[__IM]], float** [[__IM_ADDR]], align 8, !tbaa [[TBAA10]] +// CHECK2-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: store float* [[__RE]], float** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: store float* [[__IM]], float** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: [[TMP1:%.*]] = load float, float* [[TMP0]], align 4, !tbaa [[TBAA16]] -// CHECK2-NEXT: store float [[TMP1]], float* [[__RE_]], align 4, !tbaa [[TBAA18]] +// CHECK2-NEXT: [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[TMP1:%.*]] = load float, float* [[TMP0]], align 4, !tbaa [[TBAA14]] +// CHECK2-NEXT: store float [[TMP1]], float* [[__RE_]], align 4, !tbaa [[TBAA16]] // CHECK2-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP2:%.*]] = load float*, float** [[__IM_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: [[TMP3:%.*]] = load float, float* [[TMP2]], align 4, !tbaa [[TBAA16]] -// CHECK2-NEXT: store float [[TMP3]], float* [[__IM_]], align 4, !tbaa [[TBAA20]] +// CHECK2-NEXT: [[TMP2:%.*]] = load float*, float** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[TMP3:%.*]] = load float, float* [[TMP2]], align 4, !tbaa [[TBAA14]] +// CHECK2-NEXT: store float [[TMP3]], float* [[__IM_]], align 4, !tbaa [[TBAA18]] // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@_ZNKSt7complexIfE4realEv -// CHECK2-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { +// CHECK2-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 -// CHECK2-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]] +// CHECK2-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP0:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA18]] +// CHECK2-NEXT: [[TMP0:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA16]] // CHECK2-NEXT: ret float [[TMP0]] // // // CHECK2-LABEL: define {{[^@]+}}@_ZNKSt7complexIfE4imagEv -// CHECK2-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { +// CHECK2-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { // CHECK2-NEXT: entry: // CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 -// CHECK2-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]] +// CHECK2-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK2-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP0:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA20]] +// CHECK2-NEXT: [[TMP0:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA18]] // CHECK2-NEXT: ret float [[TMP0]] // // // CHECK2-LABEL: define {{[^@]+}}@_ZNSt7complexIdEC2ERKdS2_ -// CHECK2-SAME: (%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], double* nonnull align 8 dereferenceable(8) [[__RE:%.*]], double* nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 { +// CHECK2-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], double* nonnull align 8 dereferenceable(8) [[__RE:%.*]], double* nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.1"*, align 8 +// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 // CHECK2-NEXT: [[__RE_ADDR:%.*]] = alloca double*, align 8 // CHECK2-NEXT: [[__IM_ADDR:%.*]] = alloca double*, align 8 -// CHECK2-NEXT: store %"class.std::complex.1"* [[THIS]], %"class.std::complex.1"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: store double* [[__RE]], double** [[__RE_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: store double* [[__IM]], double** [[__IM_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8, !tbaa [[TBAA10]] +// CHECK2-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: store double* [[__RE]], double** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: store double* [[__IM]], double** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 +// CHECK2-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[TMP1:%.*]] = load double, double* [[TMP0]], align 8, !tbaa [[TBAA22]] // CHECK2-NEXT: store double [[TMP1]], double* [[__RE_]], align 8, !tbaa [[TBAA24]] -// CHECK2-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP2:%.*]] = load double*, double** [[__IM_ADDR]], align 8, !tbaa [[TBAA10]] +// CHECK2-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP2:%.*]] = load double*, double** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK2-NEXT: [[TMP3:%.*]] = load double, double* [[TMP2]], align 8, !tbaa [[TBAA22]] // CHECK2-NEXT: store double [[TMP3]], double* [[__IM_]], align 8, !tbaa [[TBAA26]] // CHECK2-NEXT: ret void // // // CHECK2-LABEL: define {{[^@]+}}@_ZNKSt7complexIdE4realEv -// CHECK2-SAME: (%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { +// CHECK2-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.1"*, align 8 -// CHECK2-NEXT: store %"class.std::complex.1"* [[THIS]], %"class.std::complex.1"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 0 +// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 +// CHECK2-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 +// CHECK2-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 // CHECK2-NEXT: [[TMP0:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24]] // CHECK2-NEXT: ret double [[TMP0]] // // // CHECK2-LABEL: define {{[^@]+}}@_ZNKSt7complexIdE4imagEv -// CHECK2-SAME: (%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { +// CHECK2-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { // CHECK2-NEXT: entry: -// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.1"*, align 8 -// CHECK2-NEXT: store %"class.std::complex.1"* [[THIS]], %"class.std::complex.1"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[THIS_ADDR]], align 8 -// CHECK2-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 1 +// CHECK2-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 +// CHECK2-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK2-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 +// CHECK2-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 // CHECK2-NEXT: [[TMP0:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26]] // CHECK2-NEXT: ret double [[TMP0]] // @@ -2799,7 +2787,7 @@ void test() { // CHECK3-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*) // CHECK3-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] // CHECK3: .execute.fn: -// CHECK3-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR5:[0-9]+]] +// CHECK3-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR2:[0-9]+]] // CHECK3-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] // CHECK3: .check.next: // CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* @@ -2828,7 +2816,7 @@ void test() { // CHECK3-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] // CHECK3-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] // CHECK3: .worker: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l19_worker() #[[ATTR5]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIfEvv_l19_worker() #[[ATTR2]] // CHECK3-NEXT: br label [[DOTEXIT:%.*]] // CHECK3: .mastercheck: // CHECK3-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() @@ -2845,10 +2833,9 @@ void test() { // CHECK3-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK3-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK3-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA6:![0-9]+]] -// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5]] +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8:![0-9]+]] +// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] // CHECK3-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] // CHECK3: .termination.notifier: // CHECK3-NEXT: call void @__kmpc_kernel_deinit(i16 1) @@ -2873,133 +2860,131 @@ void test() { // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[REF_TMP2:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA10:![0-9]+]] -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2, !tbaa [[TBAA12:![0-9]+]] -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* @"_openmp_static_kernel$size", align 8, !tbaa [[TBAA14:![0-9]+]] -// CHECK3-NEXT: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i64 [[TMP1]], i16 [[TMP0]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) -// CHECK3-NEXT: [[TMP2:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 0 -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty* -// CHECK3-NEXT: [[ISTART:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[IEND:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], %struct._globalized_locals_ty* [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[PARTIAL_SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY]], %struct._globalized_locals_ty* [[TMP4]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR5]] -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP7:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP7]]) #[[ATTR5]] -// CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR5]] -// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP9:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP9]]) #[[ATTR5]] -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP10:%.*]] = bitcast i32* [[IB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP10]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 99 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12:![0-9]+]] +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[ISTART:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK3-NEXT: [[ISTART_ON_STACK:%.*]] = bitcast i8* [[ISTART]] to i32* +// CHECK3-NEXT: [[IEND:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK3-NEXT: [[IEND_ON_STACK:%.*]] = bitcast i8* [[IEND]] to i32* +// CHECK3-NEXT: [[PARTIAL_SUM:%.*]] = call i8* @__kmpc_alloc_shared(i64 8) +// CHECK3-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex"* +// CHECK3-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR2]] +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP2:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR2]] +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i32* [[IB]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[IB]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP19:%.*]] = bitcast float* [[REF_TMP]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR5]] -// CHECK3-NEXT: store float 0.000000e+00, float* [[REF_TMP]], align 4, !tbaa [[TBAA16:![0-9]+]] -// CHECK3-NEXT: [[TMP20:%.*]] = bitcast float* [[REF_TMP2]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR5]] -// CHECK3-NEXT: store float 0.000000e+00, float* [[REF_TMP2]], align 4, !tbaa [[TBAA16]] -// CHECK3-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR7:[0-9]+]] -// CHECK3-NEXT: [[TMP21:%.*]] = bitcast float* [[REF_TMP2]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP21]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP22:%.*]] = bitcast float* [[REF_TMP]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP22]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP23]], 4 -// CHECK3-NEXT: store i32 [[MUL3]], i32* [[ISTART]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[IB]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP14:%.*]] = bitcast float* [[REF_TMP]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP14]]) #[[ATTR2]] +// CHECK3-NEXT: store float 0.000000e+00, float* [[REF_TMP]], align 4, !tbaa [[TBAA14:![0-9]+]] +// CHECK3-NEXT: [[TMP15:%.*]] = bitcast float* [[REF_TMP2]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP15]]) #[[ATTR2]] +// CHECK3-NEXT: store float 0.000000e+00, float* [[REF_TMP2]], align 4, !tbaa [[TBAA14]] +// CHECK3-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM_ON_STACK]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP2]]) #[[ATTR8:[0-9]+]] +// CHECK3-NEXT: [[TMP16:%.*]] = bitcast float* [[REF_TMP2]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP17:%.*]] = bitcast float* [[REF_TMP]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP18]], 4 +// CHECK3-NEXT: store i32 [[MUL3]], i32* [[ISTART_ON_STACK]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[ADD4]], 4 -// CHECK3-NEXT: store i32 [[MUL5]], i32* [[IEND]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP26:%.*]] = bitcast i32* [[ISTART]] to i8* -// CHECK3-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK3-NEXT: [[TMP28:%.*]] = bitcast i32* [[IEND]] to i8* -// CHECK3-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK3-NEXT: [[TMP30:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM]] to i8* -// CHECK3-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: [[TMP31:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i32*, %"class.std::complex"*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP31]], i64 3) +// CHECK3-NEXT: store i32 [[MUL5]], i32* [[IEND_ON_STACK]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP21:%.*]] = bitcast i32* [[ISTART_ON_STACK]] to i8* +// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK3-NEXT: [[TMP23:%.*]] = bitcast i32* [[IEND_ON_STACK]] to i8* +// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK3-NEXT: [[TMP25:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM_ON_STACK]] to i8* +// CHECK3-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[TMP26:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i32*, %"class.std::complex"*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP26]], i64 3) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], 1 -// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) -// CHECK3-NEXT: [[TMP33:%.*]] = bitcast i32* [[IB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP34:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP34]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP35:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP35]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP36:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP36]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP37:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP37]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP38:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP38]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP39:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2, !tbaa [[TBAA12]] -// CHECK3-NEXT: call void @__kmpc_restore_team_static_memory(i16 0, i16 [[TMP39]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) +// CHECK3-NEXT: [[TMP28:%.*]] = bitcast i32* [[IB]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP28]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP29:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP29]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP30:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP30]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP31:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP31]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP32:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP33:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[IEND]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZNSt7complexIfEC1ERKfS2_ -// CHECK3-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]], float* nonnull align 4 dereferenceable(4) [[__RE:%.*]], float* nonnull align 4 dereferenceable(4) [[__IM:%.*]]) unnamed_addr #[[ATTR3:[0-9]+]] comdat align 2 { +// CHECK3-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]], float* nonnull align 4 dereferenceable(4) [[__RE:%.*]], float* nonnull align 4 dereferenceable(4) [[__IM:%.*]]) unnamed_addr #[[ATTR4:[0-9]+]] comdat align 2 { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 // CHECK3-NEXT: [[__RE_ADDR:%.*]] = alloca float*, align 8 // CHECK3-NEXT: [[__IM_ADDR:%.*]] = alloca float*, align 8 -// CHECK3-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: store float* [[__RE]], float** [[__RE_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: store float* [[__IM]], float** [[__IM_ADDR]], align 8, !tbaa [[TBAA10]] +// CHECK3-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: store float* [[__RE]], float** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: store float* [[__IM]], float** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8 // CHECK3-NEXT: [[TMP1:%.*]] = load float*, float** [[__IM_ADDR]], align 8 -// CHECK3-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR7]] +// CHECK3-NEXT: call void @_ZNSt7complexIfEC2ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS1]], float* nonnull align 4 dereferenceable(4) [[TMP0]], float* nonnull align 4 dereferenceable(4) [[TMP1]]) #[[ATTR8]] // CHECK3-NEXT: ret void // // @@ -3029,95 +3014,95 @@ void test() { // CHECK3-NEXT: [[REF_TMP15:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[REF_TMP16:%.*]] = alloca float, align 4 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: store i32* [[ISTART]], i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: store i32* [[IEND]], i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: store %"class.std::complex"* [[PARTIAL_SUM]], %"class.std::complex"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA10]] +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: store i32* [[ISTART]], i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: store i32* [[IEND]], i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: store %"class.std::complex"* [[PARTIAL_SUM]], %"class.std::complex"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP6:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP8:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[SUB:%.*]] = sub i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK3-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK3-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 -// CHECK3-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP11:%.*]] = bitcast i32* [[I]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: store i32 [[TMP12]], i32* [[I]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[I]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP13:%.*]] = bitcast i32* [[I]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: [[TMP16:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR5]] -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR2]] +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP17:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP19:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR5]] -// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR2]] +// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR5]] -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR2]] +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP21:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP21]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP21]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP22:%.*]] = bitcast float* [[REF_TMP]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP22]]) #[[ATTR5]] -// CHECK3-NEXT: store float 0.000000e+00, float* [[REF_TMP]], align 4, !tbaa [[TBAA16]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP22]]) #[[ATTR2]] +// CHECK3-NEXT: store float 0.000000e+00, float* [[REF_TMP]], align 4, !tbaa [[TBAA14]] // CHECK3-NEXT: [[TMP23:%.*]] = bitcast float* [[REF_TMP6]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP23]]) #[[ATTR5]] -// CHECK3-NEXT: store float 0.000000e+00, float* [[REF_TMP6]], align 4, !tbaa [[TBAA16]] -// CHECK3-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR7]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP23]]) #[[ATTR2]] +// CHECK3-NEXT: store float 0.000000e+00, float* [[REF_TMP6]], align 4, !tbaa [[TBAA14]] +// CHECK3-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], float* nonnull align 4 dereferenceable(4) [[REF_TMP]], float* nonnull align 4 dereferenceable(4) [[REF_TMP6]]) #[[ATTR8]] // CHECK3-NEXT: [[TMP24:%.*]] = bitcast float* [[REF_TMP6]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP24]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP24]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP25:%.*]] = bitcast float* [[REF_TMP]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP25]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP25]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP26:%.*]] = bitcast i32* [[I7]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP28]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[TMP29]], [[TMP30]] // CHECK3-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: // CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE]] ], [ [[TMP32]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP35]], 1 // CHECK3-NEXT: [[CMP10:%.*]] = icmp ult i32 [[TMP34]], [[ADD9]] // CHECK3-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] @@ -3126,65 +3111,65 @@ void test() { // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[ADD11:%.*]] = add i32 [[TMP37]], 1 // CHECK3-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP36]], [[ADD11]] // CHECK3-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP39]], 1 // CHECK3-NEXT: [[ADD13:%.*]] = add i32 [[TMP38]], [[MUL]] -// CHECK3-NEXT: store i32 [[ADD13]], i32* [[I7]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: store i32 [[ADD13]], i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP40:%.*]] = bitcast %"class.std::complex"* [[REF_TMP14]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP40]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP40]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP41:%.*]] = bitcast float* [[REF_TMP15]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP41]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP41]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP42]] to float -// CHECK3-NEXT: store float [[CONV]], float* [[REF_TMP15]], align 4, !tbaa [[TBAA16]] +// CHECK3-NEXT: store float [[CONV]], float* [[REF_TMP15]], align 4, !tbaa [[TBAA14]] // CHECK3-NEXT: [[TMP43:%.*]] = bitcast float* [[REF_TMP16]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP43]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP43]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to float -// CHECK3-NEXT: store float [[CONV17]], float* [[REF_TMP16]], align 4, !tbaa [[TBAA16]] -// CHECK3-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR7]] -// CHECK3-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR7]] +// CHECK3-NEXT: store float [[CONV17]], float* [[REF_TMP16]], align 4, !tbaa [[TBAA14]] +// CHECK3-NEXT: call void @_ZNSt7complexIfEC1ERKfS2_(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]], float* nonnull align 4 dereferenceable(4) [[REF_TMP15]], float* nonnull align 4 dereferenceable(4) [[REF_TMP16]]) #[[ATTR8]] +// CHECK3-NEXT: [[CALL:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[REF_TMP14]]) #[[ATTR8]] // CHECK3-NEXT: [[TMP45:%.*]] = bitcast float* [[REF_TMP16]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP45]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP45]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP46:%.*]] = bitcast float* [[REF_TMP15]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP46]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP46]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex"* [[REF_TMP14]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP47]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP47]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[ADD18:%.*]] = add i32 [[TMP48]], 1 -// CHECK3-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[ADD19:%.*]] = add i32 [[TMP49]], [[TMP50]] -// CHECK3-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[ADD20:%.*]] = add i32 [[TMP51]], [[TMP52]] -// CHECK3-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: // CHECK3-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP54]]) // CHECK3-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP57:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 // CHECK3-NEXT: [[TMP58:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8* // CHECK3-NEXT: store i8* [[TMP58]], i8** [[TMP57]], align 8 @@ -3193,55 +3178,55 @@ void test() { // CHECK3-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 // CHECK3-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR7]] +// CHECK3-NEXT: [[CALL21:%.*]] = call nonnull align 4 dereferenceable(8) %"class.std::complex"* @_ZNSt7complexIfEpLIfEERS0_RKS_IT_E(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[PARTIAL_SUM5]]) #[[ATTR8]] // CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK3: .omp.reduction.done: // CHECK3-NEXT: [[TMP62:%.*]] = bitcast i32* [[I7]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP63:%.*]] = bitcast %"class.std::complex"* [[PARTIAL_SUM5]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP63]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP63]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP64:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP65:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP66:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP67:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: [[TMP68:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP69:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP70:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP71:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR2]] // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZNSt7complexIfEpLIfEERS0_RKS_IT_E -// CHECK3-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[__C:%.*]]) #[[ATTR4:[0-9]+]] comdat align 2 { +// CHECK3-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]], %"class.std::complex"* nonnull align 4 dereferenceable(8) [[__C:%.*]]) #[[ATTR5:[0-9]+]] comdat align 2 { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 // CHECK3-NEXT: [[__C_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 -// CHECK3-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: store %"class.std::complex"* [[__C]], %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]] +// CHECK3-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: store %"class.std::complex"* [[__C]], %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR7]] +// CHECK3-NEXT: [[TMP0:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[CALL:%.*]] = call float @_ZNKSt7complexIfE4realEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP0]]) #[[ATTR8]] // CHECK3-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP1:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA18:![0-9]+]] +// CHECK3-NEXT: [[TMP1:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA16:![0-9]+]] // CHECK3-NEXT: [[ADD:%.*]] = fadd float [[TMP1]], [[CALL]] -// CHECK3-NEXT: store float [[ADD]], float* [[__RE_]], align 4, !tbaa [[TBAA18]] -// CHECK3-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR7]] +// CHECK3-NEXT: store float [[ADD]], float* [[__RE_]], align 4, !tbaa [[TBAA16]] +// CHECK3-NEXT: [[TMP2:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[CALL2:%.*]] = call float @_ZNKSt7complexIfE4imagEv(%"class.std::complex"* nonnull align 4 dereferenceable(8) [[TMP2]]) #[[ATTR8]] // CHECK3-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP3:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA20:![0-9]+]] +// CHECK3-NEXT: [[TMP3:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA18:![0-9]+]] // CHECK3-NEXT: [[ADD3:%.*]] = fadd float [[TMP3]], [[CALL2]] -// CHECK3-NEXT: store float [[ADD3]], float* [[__IM_]], align 4, !tbaa [[TBAA20]] +// CHECK3-NEXT: store float [[ADD3]], float* [[__IM_]], align 4, !tbaa [[TBAA18]] // CHECK3-NEXT: ret %"class.std::complex"* [[THIS1]] // // @@ -3254,15 +3239,15 @@ void test() { // CHECK3-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 8 // CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca %"class.std::complex", align 4 -// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2, !tbaa [[TBAA12]] -// CHECK3-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2, !tbaa [[TBAA12]] -// CHECK3-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2, !tbaa [[TBAA12]] -// CHECK3-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !tbaa [[TBAA10]] +// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2, !tbaa [[TBAA19:![0-9]+]] +// CHECK3-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] +// CHECK3-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] +// CHECK3-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* -// CHECK3-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2, !tbaa [[TBAA12]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2, !tbaa [[TBAA12]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2, !tbaa [[TBAA19]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 // CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 @@ -3279,7 +3264,7 @@ void test() { // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr i64, i64* [[TMP15]], i64 1 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP16]], i64 1 // CHECK3-NEXT: [[TMP22:%.*]] = bitcast %"class.std::complex"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK3-NEXT: store i8* [[TMP22]], i8** [[TMP11]], align 8, !tbaa [[TBAA10]] +// CHECK3-NEXT: store i8* [[TMP22]], i8** [[TMP11]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[TMP23:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK3-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK3-NEXT: [[TMP25:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -3296,7 +3281,7 @@ void test() { // CHECK3: then: // CHECK3-NEXT: [[TMP35:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* // CHECK3-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR5]] +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR2]] // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] @@ -3329,8 +3314,8 @@ void test() { // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() // CHECK3-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() // CHECK3-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 @@ -3338,10 +3323,10 @@ void test() { // CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 // CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 // CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* -// CHECK3-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: br label [[PRECOND:%.*]] // CHECK3: precond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP5]], 2 // CHECK3-NEXT: br i1 [[TMP6]], label [[BODY:%.*]], label [[EXIT:%.*]] // CHECK3: body: @@ -3350,7 +3335,7 @@ void test() { // CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK3: then: // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 8, !tbaa [[TBAA10]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[TMP9]], i32 [[TMP5]] // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] @@ -3361,23 +3346,23 @@ void test() { // CHECK3-NEXT: br label [[IFCONT]] // CHECK3: ifcont: // CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP13]] // CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK3: then4: // CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] // CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8, !tbaa [[TBAA10]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i32* // CHECK3-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP17]], i32 [[TMP5]] -// CHECK3-NEXT: [[TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: br label [[IFCONT6:%.*]] // CHECK3: else5: // CHECK3-NEXT: br label [[IFCONT6]] // CHECK3: ifcont6: // CHECK3-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: br label [[PRECOND]] // CHECK3: exit: // CHECK3-NEXT: ret void @@ -3391,20 +3376,20 @@ void test() { // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA12]] -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]] +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK3-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 // CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8, !tbaa [[TBAA10]] +// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1 // CHECK3-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32** -// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8, !tbaa [[TBAA10]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 2 // CHECK3-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex"** -// CHECK3-NEXT: [[TMP11:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[TMP10]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex"* [[TMP11]]) #[[ATTR5]] +// CHECK3-NEXT: [[TMP11:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[TMP10]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex"* [[TMP11]]) #[[ATTR2]] // CHECK3-NEXT: ret void // // @@ -3431,10 +3416,10 @@ void test() { // CHECK3: .execute.parallel: // CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK3-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK3-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*) +// CHECK3-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*) // CHECK3-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] // CHECK3: .execute.fn: -// CHECK3-NEXT: call void @__omp_outlined__5_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR5]] +// CHECK3-NEXT: call void @__omp_outlined__3_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR2]] // CHECK3-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] // CHECK3: .check.next: // CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* @@ -3463,7 +3448,7 @@ void test() { // CHECK3-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] // CHECK3-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] // CHECK3: .worker: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l19_worker() #[[ATTR5]] +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z17complex_reductionIdEvv_l19_worker() #[[ATTR2]] // CHECK3-NEXT: br label [[DOTEXIT:%.*]] // CHECK3: .mastercheck: // CHECK3-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() @@ -3480,10 +3465,9 @@ void test() { // CHECK3-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK3-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK3-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR5]] +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR2]] // CHECK3-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] // CHECK3: .termination.notifier: // CHECK3-NEXT: call void @__kmpc_kernel_deinit(i16 1) @@ -3508,144 +3492,142 @@ void test() { // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[REF_TMP2:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared3", align 2, !tbaa [[TBAA12]] -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* @"_openmp_static_kernel$size4", align 8, !tbaa [[TBAA14]] -// CHECK3-NEXT: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i64 [[TMP1]], i16 [[TMP0]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) -// CHECK3-NEXT: [[TMP2:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 8 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 0 -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.0* -// CHECK3-NEXT: [[ISTART:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[IEND:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 2 -// CHECK3-NEXT: [[PARTIAL_SUM:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR5]] -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP7:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP7]]) #[[ATTR5]] -// CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR5]] -// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP9:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP9]]) #[[ATTR5]] -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP10:%.*]] = bitcast i32* [[IB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP10]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP13]], 99 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[ISTART:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK3-NEXT: [[ISTART_ON_STACK:%.*]] = bitcast i8* [[ISTART]] to i32* +// CHECK3-NEXT: [[IEND:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK3-NEXT: [[IEND_ON_STACK:%.*]] = bitcast i8* [[IEND]] to i32* +// CHECK3-NEXT: [[PARTIAL_SUM:%.*]] = call i8* @__kmpc_alloc_shared(i64 16) +// CHECK3-NEXT: [[PARTIAL_SUM_ON_STACK:%.*]] = bitcast i8* [[PARTIAL_SUM]] to %"class.std::complex.0"* +// CHECK3-NEXT: [[TMP0:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP0]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP1:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP1]]) #[[ATTR2]] +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP2:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP2]]) #[[ATTR2]] +// CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR2]] +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i32* [[IB]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP5]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP8]], 99 // CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP14]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP9]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: store i32 [[TMP10]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP11]], [[TMP12]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP13]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[IB]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP19:%.*]] = bitcast double* [[REF_TMP]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP19]]) #[[ATTR5]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[IB]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP14:%.*]] = bitcast double* [[REF_TMP]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP14]]) #[[ATTR2]] // CHECK3-NEXT: store double 0.000000e+00, double* [[REF_TMP]], align 8, !tbaa [[TBAA22:![0-9]+]] -// CHECK3-NEXT: [[TMP20:%.*]] = bitcast double* [[REF_TMP2]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP20]]) #[[ATTR5]] +// CHECK3-NEXT: [[TMP15:%.*]] = bitcast double* [[REF_TMP2]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP15]]) #[[ATTR2]] // CHECK3-NEXT: store double 0.000000e+00, double* [[REF_TMP2]], align 8, !tbaa [[TBAA22]] -// CHECK3-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR7]] -// CHECK3-NEXT: [[TMP21:%.*]] = bitcast double* [[REF_TMP2]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP21]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP22:%.*]] = bitcast double* [[REF_TMP]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP22]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP23]], 4 -// CHECK3-NEXT: store i32 [[MUL3]], i32* [[ISTART]], align 8, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], 1 +// CHECK3-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM_ON_STACK]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP2]]) #[[ATTR8]] +// CHECK3-NEXT: [[TMP16:%.*]] = bitcast double* [[REF_TMP2]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP16]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP17:%.*]] = bitcast double* [[REF_TMP]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP17]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[MUL3:%.*]] = mul nsw i32 [[TMP18]], 4 +// CHECK3-NEXT: store i32 [[MUL3]], i32* [[ISTART_ON_STACK]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[IB]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP19]], 1 // CHECK3-NEXT: [[MUL5:%.*]] = mul nsw i32 [[ADD4]], 4 -// CHECK3-NEXT: store i32 [[MUL5]], i32* [[IEND]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP26:%.*]] = bitcast i32* [[ISTART]] to i8* -// CHECK3-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK3-NEXT: [[TMP28:%.*]] = bitcast i32* [[IEND]] to i8* -// CHECK3-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK3-NEXT: [[TMP30:%.*]] = bitcast %"class.std::complex.1"* [[PARTIAL_SUM]] to i8* -// CHECK3-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: [[TMP31:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP12]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i32*, %"class.std::complex.1"*)* @__omp_outlined__5 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** [[TMP31]], i64 3) +// CHECK3-NEXT: store i32 [[MUL5]], i32* [[IEND_ON_STACK]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK3-NEXT: [[TMP21:%.*]] = bitcast i32* [[ISTART_ON_STACK]] to i8* +// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK3-NEXT: [[TMP23:%.*]] = bitcast i32* [[IEND_ON_STACK]] to i8* +// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK3-NEXT: [[TMP25:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM_ON_STACK]] to i8* +// CHECK3-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[TMP26:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i32*, %"class.std::complex.0"*)* @__omp_outlined__3 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** [[TMP26]], i64 3) // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], 1 -// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP27]], 1 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP12]]) -// CHECK3-NEXT: [[TMP33:%.*]] = bitcast i32* [[IB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP34:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP34]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP35:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP35]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP36:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP36]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP37:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP37]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP38:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP38]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP39:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared3", align 2, !tbaa [[TBAA12]] -// CHECK3-NEXT: call void @__kmpc_restore_team_static_memory(i16 0, i16 [[TMP39]]) +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]]) +// CHECK3-NEXT: [[TMP28:%.*]] = bitcast i32* [[IB]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP28]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP29:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP29]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP30:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP30]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP31:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP31]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP32:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP32]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP33:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP33]]) #[[ATTR2]] +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[PARTIAL_SUM]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[IEND]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[ISTART]]) // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZNSt7complexIdEC1ERKdS2_ -// CHECK3-SAME: (%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], double* nonnull align 8 dereferenceable(8) [[__RE:%.*]], double* nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 { +// CHECK3-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], double* nonnull align 8 dereferenceable(8) [[__RE:%.*]], double* nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.1"*, align 8 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 // CHECK3-NEXT: [[__RE_ADDR:%.*]] = alloca double*, align 8 // CHECK3-NEXT: [[__IM_ADDR:%.*]] = alloca double*, align 8 -// CHECK3-NEXT: store %"class.std::complex.1"* [[THIS]], %"class.std::complex.1"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: store double* [[__RE]], double** [[__RE_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: store double* [[__IM]], double** [[__IM_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[THIS_ADDR]], align 8 +// CHECK3-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: store double* [[__RE]], double** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: store double* [[__IM]], double** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8 // CHECK3-NEXT: [[TMP1:%.*]] = load double*, double** [[__IM_ADDR]], align 8 -// CHECK3-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR7]] +// CHECK3-NEXT: call void @_ZNSt7complexIdEC2ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS1]], double* nonnull align 8 dereferenceable(8) [[TMP0]], double* nonnull align 8 dereferenceable(8) [[TMP1]]) #[[ATTR8]] // CHECK3-NEXT: ret void // // -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ISTART:%.*]], i32* nonnull align 4 dereferenceable(4) [[IEND:%.*]], %"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM:%.*]]) #[[ATTR0]] { +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__3 +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ISTART:%.*]], i32* nonnull align 4 dereferenceable(4) [[IEND:%.*]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM:%.*]]) #[[ATTR0]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[ISTART_ADDR:%.*]] = alloca i32*, align 8 // CHECK3-NEXT: [[IEND_ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[PARTIAL_SUM_ADDR:%.*]] = alloca %"class.std::complex.1"*, align 8 +// CHECK3-NEXT: [[PARTIAL_SUM_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 // CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -3656,103 +3638,103 @@ void test() { // CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[PARTIAL_SUM5:%.*]] = alloca %"class.std::complex.1", align 8 +// CHECK3-NEXT: [[PARTIAL_SUM5:%.*]] = alloca %"class.std::complex.0", align 8 // CHECK3-NEXT: [[REF_TMP:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[REF_TMP6:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[I7:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[REF_TMP14:%.*]] = alloca %"class.std::complex.1", align 8 +// CHECK3-NEXT: [[REF_TMP14:%.*]] = alloca %"class.std::complex.0", align 8 // CHECK3-NEXT: [[REF_TMP15:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[REF_TMP16:%.*]] = alloca double, align 8 // CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: store i32* [[ISTART]], i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: store i32* [[IEND]], i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: store %"class.std::complex.1"* [[PARTIAL_SUM]], %"class.std::complex.1"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: [[TMP2:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA10]] +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: store i32* [[ISTART]], i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: store i32* [[IEND]], i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: store %"class.std::complex.0"* [[PARTIAL_SUM]], %"class.std::complex.0"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ISTART_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[IEND_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[TMP2:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[PARTIAL_SUM_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[TMP3:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP3]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP4:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP4]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP6:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP6]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP1]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP8:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP8]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[SUB:%.*]] = sub i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: [[SUB3:%.*]] = sub i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add i32 [[SUB3]], 1 // CHECK3-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1 // CHECK3-NEXT: [[SUB4:%.*]] = sub i32 [[DIV]], 1 -// CHECK3-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP11:%.*]] = bitcast i32* [[I]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: store i32 [[TMP12]], i32* [[I]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP11]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[I]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP13:%.*]] = bitcast i32* [[I]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP13]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] // CHECK3: omp.precond.then: // CHECK3-NEXT: [[TMP16:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR5]] -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP16]]) #[[ATTR2]] +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP17:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP17]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP19:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR5]] -// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP19]]) #[[ATTR2]] +// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR5]] -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP21:%.*]] = bitcast %"class.std::complex.1"* [[PARTIAL_SUM5]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP21]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP20]]) #[[ATTR2]] +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP21:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM5]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP21]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP22:%.*]] = bitcast double* [[REF_TMP]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP22]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP22]]) #[[ATTR2]] // CHECK3-NEXT: store double 0.000000e+00, double* [[REF_TMP]], align 8, !tbaa [[TBAA22]] // CHECK3-NEXT: [[TMP23:%.*]] = bitcast double* [[REF_TMP6]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP23]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP23]]) #[[ATTR2]] // CHECK3-NEXT: store double 0.000000e+00, double* [[REF_TMP6]], align 8, !tbaa [[TBAA22]] -// CHECK3-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR7]] +// CHECK3-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], double* nonnull align 8 dereferenceable(8) [[REF_TMP]], double* nonnull align 8 dereferenceable(8) [[REF_TMP6]]) #[[ATTR8]] // CHECK3-NEXT: [[TMP24:%.*]] = bitcast double* [[REF_TMP6]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP24]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP24]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP25:%.*]] = bitcast double* [[REF_TMP]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP25]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP25]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP26:%.*]] = bitcast i32* [[I7]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 4, i8* [[TMP26]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB2]], i32 [[TMP28]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) // CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] // CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[CMP8:%.*]] = icmp ugt i32 [[TMP29]], [[TMP30]] // CHECK3-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK3: cond.true: -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: br label [[COND_END:%.*]] // CHECK3: cond.false: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: br label [[COND_END]] // CHECK3: cond.end: // CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE]] ], [ [[TMP32]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP35]], 1 // CHECK3-NEXT: [[CMP10:%.*]] = icmp ult i32 [[TMP34]], [[ADD9]] // CHECK3-NEXT: br i1 [[CMP10]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]] @@ -3761,126 +3743,126 @@ void test() { // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[ADD11:%.*]] = add i32 [[TMP37]], 1 // CHECK3-NEXT: [[CMP12:%.*]] = icmp ult i32 [[TMP36]], [[ADD11]] // CHECK3-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]] // CHECK3: omp.inner.for.cond.cleanup: // CHECK3-NEXT: br label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP39]], 1 // CHECK3-NEXT: [[ADD13:%.*]] = add i32 [[TMP38]], [[MUL]] -// CHECK3-NEXT: store i32 [[ADD13]], i32* [[I7]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP40:%.*]] = bitcast %"class.std::complex.1"* [[REF_TMP14]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP40]]) #[[ATTR5]] +// CHECK3-NEXT: store i32 [[ADD13]], i32* [[I7]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP40:%.*]] = bitcast %"class.std::complex.0"* [[REF_TMP14]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 16, i8* [[TMP40]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP41:%.*]] = bitcast double* [[REF_TMP15]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP41]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP41]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP42]] to double // CHECK3-NEXT: store double [[CONV]], double* [[REF_TMP15]], align 8, !tbaa [[TBAA22]] // CHECK3-NEXT: [[TMP43:%.*]] = bitcast double* [[REF_TMP16]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP43]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: call void @llvm.lifetime.start.p0i8(i64 8, i8* [[TMP43]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[I7]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[CONV17:%.*]] = sitofp i32 [[TMP44]] to double // CHECK3-NEXT: store double [[CONV17]], double* [[REF_TMP16]], align 8, !tbaa [[TBAA22]] -// CHECK3-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR7]] -// CHECK3-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.1"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR7]] +// CHECK3-NEXT: call void @_ZNSt7complexIdEC1ERKdS2_(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]], double* nonnull align 8 dereferenceable(8) [[REF_TMP15]], double* nonnull align 8 dereferenceable(8) [[REF_TMP16]]) #[[ATTR8]] +// CHECK3-NEXT: [[CALL:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[REF_TMP14]]) #[[ATTR8]] // CHECK3-NEXT: [[TMP45:%.*]] = bitcast double* [[REF_TMP16]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP45]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP45]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP46:%.*]] = bitcast double* [[REF_TMP15]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP46]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex.1"* [[REF_TMP14]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP47]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 8, i8* [[TMP46]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP47:%.*]] = bitcast %"class.std::complex.0"* [[REF_TMP14]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP47]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[ADD18:%.*]] = add i32 [[TMP48]], 1 -// CHECK3-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: store i32 [[ADD18]], i32* [[DOTOMP_IV]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[ADD19:%.*]] = add i32 [[TMP49]], [[TMP50]] -// CHECK3-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_LB]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[ADD20:%.*]] = add i32 [[TMP51]], [[TMP52]] -// CHECK3-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_UB]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] // CHECK3: omp.dispatch.end: // CHECK3-NEXT: [[TMP53:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[TMP54:%.*]] = load i32, i32* [[TMP53]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB2]], i32 [[TMP54]]) // CHECK3-NEXT: [[TMP55:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[TMP56:%.*]] = load i32, i32* [[TMP55]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP57:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP58:%.*]] = bitcast %"class.std::complex.1"* [[PARTIAL_SUM5]] to i8* +// CHECK3-NEXT: [[TMP58:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM5]] to i8* // CHECK3-NEXT: store i8* [[TMP58]], i8** [[TMP57]], align 8 // CHECK3-NEXT: [[TMP59:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP60:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP56]], i32 1, i64 8, i8* [[TMP59]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func7, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func8) +// CHECK3-NEXT: [[TMP60:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP56]], i32 1, i64 8, i8* [[TMP59]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func5, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func6) // CHECK3-NEXT: [[TMP61:%.*]] = icmp eq i32 [[TMP60]], 1 // CHECK3-NEXT: br i1 [[TMP61]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] // CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.1"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR7]] +// CHECK3-NEXT: [[CALL21:%.*]] = call nonnull align 8 dereferenceable(16) %"class.std::complex.0"* @_ZNSt7complexIdEpLIdEERS0_RKS_IT_E(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[PARTIAL_SUM5]]) #[[ATTR8]] // CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP56]]) // CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK3: .omp.reduction.done: // CHECK3-NEXT: [[TMP62:%.*]] = bitcast i32* [[I7]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR5]] -// CHECK3-NEXT: [[TMP63:%.*]] = bitcast %"class.std::complex.1"* [[PARTIAL_SUM5]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP63]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP62]]) #[[ATTR2]] +// CHECK3-NEXT: [[TMP63:%.*]] = bitcast %"class.std::complex.0"* [[PARTIAL_SUM5]] to i8* +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 16, i8* [[TMP63]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP64:%.*]] = bitcast i32* [[DOTOMP_IS_LAST]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP64]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP65:%.*]] = bitcast i32* [[DOTOMP_STRIDE]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP65]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP66:%.*]] = bitcast i32* [[DOTOMP_UB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP66]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP67:%.*]] = bitcast i32* [[DOTOMP_LB]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP67]]) #[[ATTR2]] // CHECK3-NEXT: br label [[OMP_PRECOND_END]] // CHECK3: omp.precond.end: // CHECK3-NEXT: [[TMP68:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_2]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP68]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP69:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_1]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP69]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP70:%.*]] = bitcast i32* [[DOTCAPTURE_EXPR_]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP70]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP71:%.*]] = bitcast i32* [[DOTOMP_IV]] to i8* -// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR5]] +// CHECK3-NEXT: call void @llvm.lifetime.end.p0i8(i64 4, i8* [[TMP71]]) #[[ATTR2]] // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZNSt7complexIdEpLIdEERS0_RKS_IT_E -// CHECK3-SAME: (%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], %"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[__C:%.*]]) #[[ATTR4]] comdat align 2 { +// CHECK3-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], %"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[__C:%.*]]) #[[ATTR5]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.1"*, align 8 -// CHECK3-NEXT: [[__C_ADDR:%.*]] = alloca %"class.std::complex.1"*, align 8 -// CHECK3-NEXT: store %"class.std::complex.1"* [[THIS]], %"class.std::complex.1"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: store %"class.std::complex.1"* [[__C]], %"class.std::complex.1"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR7]] -// CHECK3-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 +// CHECK3-NEXT: [[__C_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 +// CHECK3-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: store %"class.std::complex.0"* [[__C]], %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[CALL:%.*]] = call double @_ZNKSt7complexIdE4realEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP0]]) #[[ATTR8]] +// CHECK3-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP1:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24:![0-9]+]] // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], [[CALL]] // CHECK3-NEXT: store double [[ADD]], double* [[__RE_]], align 8, !tbaa [[TBAA24]] -// CHECK3-NEXT: [[TMP2:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[__C_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR7]] -// CHECK3-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP2:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[__C_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[CALL2:%.*]] = call double @_ZNKSt7complexIdE4imagEv(%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[TMP2]]) #[[ATTR8]] +// CHECK3-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 // CHECK3-NEXT: [[TMP3:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26:![0-9]+]] // CHECK3-NEXT: [[ADD3:%.*]] = fadd double [[TMP3]], [[CALL2]] // CHECK3-NEXT: store double [[ADD3]], double* [[__IM_]], align 8, !tbaa [[TBAA26]] -// CHECK3-NEXT: ret %"class.std::complex.1"* [[THIS1]] +// CHECK3-NEXT: ret %"class.std::complex.0"* [[THIS1]] // // -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func7 +// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func5 // CHECK3-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 @@ -3888,24 +3870,24 @@ void test() { // CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 // CHECK3-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 8 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca %"class.std::complex.1", align 8 -// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2, !tbaa [[TBAA12]] -// CHECK3-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2, !tbaa [[TBAA12]] -// CHECK3-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2, !tbaa [[TBAA12]] -// CHECK3-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !tbaa [[TBAA10]] +// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca %"class.std::complex.0", align 8 +// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2, !tbaa [[TBAA19]] +// CHECK3-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] +// CHECK3-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] +// CHECK3-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* -// CHECK3-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2, !tbaa [[TBAA12]] -// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2, !tbaa [[TBAA12]] -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2, !tbaa [[TBAA19]] +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2, !tbaa [[TBAA19]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2, !tbaa [[TBAA19]] // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 // CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to %"class.std::complex.1"* -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr %"class.std::complex.1", %"class.std::complex.1"* [[TMP12]], i64 1 -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast %"class.std::complex.1"* [[TMP13]] to i8* -// CHECK3-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex.1"* [[TMP12]] to i64* -// CHECK3-NEXT: [[TMP16:%.*]] = bitcast %"class.std::complex.1"* [[DOTOMP_REDUCTION_ELEMENT]] to i64* +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to %"class.std::complex.0"* +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr %"class.std::complex.0", %"class.std::complex.0"* [[TMP12]], i64 1 +// CHECK3-NEXT: [[TMP14:%.*]] = bitcast %"class.std::complex.0"* [[TMP13]] to i8* +// CHECK3-NEXT: [[TMP15:%.*]] = bitcast %"class.std::complex.0"* [[TMP12]] to i64* +// CHECK3-NEXT: [[TMP16:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i64* // CHECK3-NEXT: br label [[DOTSHUFFLE_PRE_COND:%.*]] // CHECK3: .shuffle.pre_cond: // CHECK3-NEXT: [[TMP17:%.*]] = phi i64* [ [[TMP15]], [[ENTRY:%.*]] ], [ [[TMP28:%.*]], [[DOTSHUFFLE_THEN:%.*]] ] @@ -3927,8 +3909,8 @@ void test() { // CHECK3-NEXT: [[TMP29]] = getelementptr i64, i64* [[TMP18]], i64 1 // CHECK3-NEXT: br label [[DOTSHUFFLE_PRE_COND]] // CHECK3: .shuffle.exit: -// CHECK3-NEXT: [[TMP30:%.*]] = bitcast %"class.std::complex.1"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK3-NEXT: store i8* [[TMP30]], i8** [[TMP11]], align 8, !tbaa [[TBAA10]] +// CHECK3-NEXT: [[TMP30:%.*]] = bitcast %"class.std::complex.0"* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK3-NEXT: store i8* [[TMP30]], i8** [[TMP11]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[TMP31:%.*]] = icmp eq i16 [[TMP8]], 0 // CHECK3-NEXT: [[TMP32:%.*]] = icmp eq i16 [[TMP8]], 1 // CHECK3-NEXT: [[TMP33:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] @@ -3945,7 +3927,7 @@ void test() { // CHECK3: then: // CHECK3-NEXT: [[TMP43:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* // CHECK3-NEXT: [[TMP44:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func6"(i8* [[TMP43]], i8* [[TMP44]]) #[[ATTR5]] +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func4"(i8* [[TMP43]], i8* [[TMP44]]) #[[ATTR2]] // CHECK3-NEXT: br label [[IFCONT:%.*]] // CHECK3: else: // CHECK3-NEXT: br label [[IFCONT]] @@ -3959,10 +3941,10 @@ void test() { // CHECK3-NEXT: [[TMP49:%.*]] = load i8*, i8** [[TMP48]], align 8 // CHECK3-NEXT: [[TMP50:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 // CHECK3-NEXT: [[TMP51:%.*]] = load i8*, i8** [[TMP50]], align 8 -// CHECK3-NEXT: [[TMP52:%.*]] = bitcast i8* [[TMP49]] to %"class.std::complex.1"* -// CHECK3-NEXT: [[TMP53:%.*]] = bitcast i8* [[TMP51]] to %"class.std::complex.1"* -// CHECK3-NEXT: [[TMP54:%.*]] = bitcast %"class.std::complex.1"* [[TMP53]] to i8* -// CHECK3-NEXT: [[TMP55:%.*]] = bitcast %"class.std::complex.1"* [[TMP52]] to i8* +// CHECK3-NEXT: [[TMP52:%.*]] = bitcast i8* [[TMP49]] to %"class.std::complex.0"* +// CHECK3-NEXT: [[TMP53:%.*]] = bitcast i8* [[TMP51]] to %"class.std::complex.0"* +// CHECK3-NEXT: [[TMP54:%.*]] = bitcast %"class.std::complex.0"* [[TMP53]] to i8* +// CHECK3-NEXT: [[TMP55:%.*]] = bitcast %"class.std::complex.0"* [[TMP52]] to i8* // CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 8 [[TMP54]], i8* align 8 [[TMP55]], i64 16, i1 false), !tbaa.struct !27 // CHECK3-NEXT: br label [[IFCONT6:%.*]] // CHECK3: else5: @@ -3971,15 +3953,15 @@ void test() { // CHECK3-NEXT: ret void // // -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func8 +// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func6 // CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 // CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() // CHECK3-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() // CHECK3-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 @@ -3987,10 +3969,10 @@ void test() { // CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 // CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 // CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* -// CHECK3-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: br label [[PRECOND:%.*]] // CHECK3: precond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP5]], 4 // CHECK3-NEXT: br i1 [[TMP6]], label [[BODY:%.*]], label [[EXIT:%.*]] // CHECK3: body: @@ -3999,7 +3981,7 @@ void test() { // CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] // CHECK3: then: // CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 8, !tbaa [[TBAA10]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* // CHECK3-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[TMP9]], i32 [[TMP5]] // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] @@ -4010,29 +3992,29 @@ void test() { // CHECK3-NEXT: br label [[IFCONT]] // CHECK3: ifcont: // CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP13]] // CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] // CHECK3: then4: // CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] // CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8, !tbaa [[TBAA10]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i32* // CHECK3-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP17]], i32 [[TMP5]] -// CHECK3-NEXT: [[TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4, !tbaa [[TBAA6]] -// CHECK3-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: [[TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4, !tbaa [[TBAA8]] +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: br label [[IFCONT6:%.*]] // CHECK3: else5: // CHECK3-NEXT: br label [[IFCONT6]] // CHECK3: ifcont6: // CHECK3-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTCNT_ADDR]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: br label [[PRECOND]] // CHECK3: exit: // CHECK3-NEXT: ret void // // -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__5_wrapper +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__3_wrapper // CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 @@ -4040,105 +4022,105 @@ void test() { // CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 // CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA12]] -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA6]] +// CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2, !tbaa [[TBAA19]] +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4, !tbaa [[TBAA8]] // CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) // CHECK3-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 // CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 // CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8, !tbaa [[TBAA10]] +// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 1 // CHECK3-NEXT: [[TMP7:%.*]] = bitcast i8** [[TMP6]] to i32** -// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8, !tbaa [[TBAA10]] +// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[TMP7]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 2 -// CHECK3-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex.1"** -// CHECK3-NEXT: [[TMP11:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[TMP10]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: call void @__omp_outlined__5(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex.1"* [[TMP11]]) #[[ATTR5]] +// CHECK3-NEXT: [[TMP10:%.*]] = bitcast i8** [[TMP9]] to %"class.std::complex.0"** +// CHECK3-NEXT: [[TMP11:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[TMP10]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: call void @__omp_outlined__3(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]], i32* [[TMP8]], %"class.std::complex.0"* [[TMP11]]) #[[ATTR2]] // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZNSt7complexIfEC2ERKfS2_ -// CHECK3-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]], float* nonnull align 4 dereferenceable(4) [[__RE:%.*]], float* nonnull align 4 dereferenceable(4) [[__IM:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 { +// CHECK3-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]], float* nonnull align 4 dereferenceable(4) [[__RE:%.*]], float* nonnull align 4 dereferenceable(4) [[__IM:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 // CHECK3-NEXT: [[__RE_ADDR:%.*]] = alloca float*, align 8 // CHECK3-NEXT: [[__IM_ADDR:%.*]] = alloca float*, align 8 -// CHECK3-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: store float* [[__RE]], float** [[__RE_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: store float* [[__IM]], float** [[__IM_ADDR]], align 8, !tbaa [[TBAA10]] +// CHECK3-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: store float* [[__RE]], float** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: store float* [[__IM]], float** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: [[TMP1:%.*]] = load float, float* [[TMP0]], align 4, !tbaa [[TBAA16]] -// CHECK3-NEXT: store float [[TMP1]], float* [[__RE_]], align 4, !tbaa [[TBAA18]] +// CHECK3-NEXT: [[TMP0:%.*]] = load float*, float** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[TMP1:%.*]] = load float, float* [[TMP0]], align 4, !tbaa [[TBAA14]] +// CHECK3-NEXT: store float [[TMP1]], float* [[__RE_]], align 4, !tbaa [[TBAA16]] // CHECK3-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP2:%.*]] = load float*, float** [[__IM_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: [[TMP3:%.*]] = load float, float* [[TMP2]], align 4, !tbaa [[TBAA16]] -// CHECK3-NEXT: store float [[TMP3]], float* [[__IM_]], align 4, !tbaa [[TBAA20]] +// CHECK3-NEXT: [[TMP2:%.*]] = load float*, float** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[TMP3:%.*]] = load float, float* [[TMP2]], align 4, !tbaa [[TBAA14]] +// CHECK3-NEXT: store float [[TMP3]], float* [[__IM_]], align 4, !tbaa [[TBAA18]] // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZNKSt7complexIfE4realEv -// CHECK3-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { +// CHECK3-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 -// CHECK3-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]] +// CHECK3-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP0:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA18]] +// CHECK3-NEXT: [[TMP0:%.*]] = load float, float* [[__RE_]], align 4, !tbaa [[TBAA16]] // CHECK3-NEXT: ret float [[TMP0]] // // // CHECK3-LABEL: define {{[^@]+}}@_ZNKSt7complexIfE4imagEv -// CHECK3-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { +// CHECK3-SAME: (%"class.std::complex"* nonnull align 4 dereferenceable(8) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { // CHECK3-NEXT: entry: // CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex"*, align 8 -// CHECK3-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]] +// CHECK3-NEXT: store %"class.std::complex"* [[THIS]], %"class.std::complex"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex"*, %"class.std::complex"** [[THIS_ADDR]], align 8 // CHECK3-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex", %"class.std::complex"* [[THIS1]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP0:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA20]] +// CHECK3-NEXT: [[TMP0:%.*]] = load float, float* [[__IM_]], align 4, !tbaa [[TBAA18]] // CHECK3-NEXT: ret float [[TMP0]] // // // CHECK3-LABEL: define {{[^@]+}}@_ZNSt7complexIdEC2ERKdS2_ -// CHECK3-SAME: (%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], double* nonnull align 8 dereferenceable(8) [[__RE:%.*]], double* nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR3]] comdat align 2 { +// CHECK3-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]], double* nonnull align 8 dereferenceable(8) [[__RE:%.*]], double* nonnull align 8 dereferenceable(8) [[__IM:%.*]]) unnamed_addr #[[ATTR4]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.1"*, align 8 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 // CHECK3-NEXT: [[__RE_ADDR:%.*]] = alloca double*, align 8 // CHECK3-NEXT: [[__IM_ADDR:%.*]] = alloca double*, align 8 -// CHECK3-NEXT: store %"class.std::complex.1"* [[THIS]], %"class.std::complex.1"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: store double* [[__RE]], double** [[__RE_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: store double* [[__IM]], double** [[__IM_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8, !tbaa [[TBAA10]] +// CHECK3-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: store double* [[__RE]], double** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: store double* [[__IM]], double** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 +// CHECK3-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[__RE_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[TMP1:%.*]] = load double, double* [[TMP0]], align 8, !tbaa [[TBAA22]] // CHECK3-NEXT: store double [[TMP1]], double* [[__RE_]], align 8, !tbaa [[TBAA24]] -// CHECK3-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[__IM_ADDR]], align 8, !tbaa [[TBAA10]] +// CHECK3-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP2:%.*]] = load double*, double** [[__IM_ADDR]], align 8, !tbaa [[TBAA12]] // CHECK3-NEXT: [[TMP3:%.*]] = load double, double* [[TMP2]], align 8, !tbaa [[TBAA22]] // CHECK3-NEXT: store double [[TMP3]], double* [[__IM_]], align 8, !tbaa [[TBAA26]] // CHECK3-NEXT: ret void // // // CHECK3-LABEL: define {{[^@]+}}@_ZNKSt7complexIdE4realEv -// CHECK3-SAME: (%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { +// CHECK3-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.1"*, align 8 -// CHECK3-NEXT: store %"class.std::complex.1"* [[THIS]], %"class.std::complex.1"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 0 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 +// CHECK3-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 +// CHECK3-NEXT: [[__RE_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 0 // CHECK3-NEXT: [[TMP0:%.*]] = load double, double* [[__RE_]], align 8, !tbaa [[TBAA24]] // CHECK3-NEXT: ret double [[TMP0]] // // // CHECK3-LABEL: define {{[^@]+}}@_ZNKSt7complexIdE4imagEv -// CHECK3-SAME: (%"class.std::complex.1"* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR4]] comdat align 2 { +// CHECK3-SAME: (%"class.std::complex.0"* nonnull align 8 dereferenceable(16) [[THIS:%.*]]) #[[ATTR5]] comdat align 2 { // CHECK3-NEXT: entry: -// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.1"*, align 8 -// CHECK3-NEXT: store %"class.std::complex.1"* [[THIS]], %"class.std::complex.1"** [[THIS_ADDR]], align 8, !tbaa [[TBAA10]] -// CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex.1"*, %"class.std::complex.1"** [[THIS_ADDR]], align 8 -// CHECK3-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.1", %"class.std::complex.1"* [[THIS1]], i32 0, i32 1 +// CHECK3-NEXT: [[THIS_ADDR:%.*]] = alloca %"class.std::complex.0"*, align 8 +// CHECK3-NEXT: store %"class.std::complex.0"* [[THIS]], %"class.std::complex.0"** [[THIS_ADDR]], align 8, !tbaa [[TBAA12]] +// CHECK3-NEXT: [[THIS1:%.*]] = load %"class.std::complex.0"*, %"class.std::complex.0"** [[THIS_ADDR]], align 8 +// CHECK3-NEXT: [[__IM_:%.*]] = getelementptr inbounds %"class.std::complex.0", %"class.std::complex.0"* [[THIS1]], i32 0, i32 1 // CHECK3-NEXT: [[TMP0:%.*]] = load double, double* [[__IM_]], align 8, !tbaa [[TBAA26]] // CHECK3-NEXT: ret double [[TMP0]] // diff --git a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp index 0dc6cddad1d9d..d456991be463d 100644 --- a/clang/test/OpenMP/nvptx_target_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_codegen.cpp @@ -118,7 +118,6 @@ int bar(int n){ // CHECK1-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK1-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK1-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 8 // CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[A_CASTED]] to i8* @@ -218,7 +217,6 @@ int bar(int n){ // CHECK1-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK1-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK1-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK1-NEXT: [[TMP6:%.*]] = load i16, i16* [[CONV]], align 8 // CHECK1-NEXT: [[CONV7:%.*]] = bitcast i64* [[AA_CASTED]] to i16* @@ -261,7 +259,6 @@ int bar(int n){ // CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[AA_ADDR]] to i16* // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK1-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK1: .execute: // CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) @@ -404,7 +401,6 @@ int bar(int n){ // CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK2-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 4 // CHECK2-NEXT: [[CONV7:%.*]] = bitcast i32* [[A_CASTED]] to i8* @@ -504,7 +500,6 @@ int bar(int n){ // CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK2-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK2-NEXT: [[TMP6:%.*]] = load i16, i16* [[CONV]], align 4 // CHECK2-NEXT: [[CONV7:%.*]] = bitcast i32* [[AA_CASTED]] to i16* @@ -547,7 +542,6 @@ int bar(int n){ // CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK2: .execute: // CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) @@ -690,7 +684,6 @@ int bar(int n){ // CHECK3-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK3-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK3-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: [[TMP6:%.*]] = load i8, i8* [[CONV]], align 4 // CHECK3-NEXT: [[CONV7:%.*]] = bitcast i32* [[A_CASTED]] to i8* @@ -790,7 +783,6 @@ int bar(int n){ // CHECK3-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() // CHECK3-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] // CHECK3-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack() // CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) // CHECK3-NEXT: [[TMP6:%.*]] = load i16, i16* [[CONV]], align 4 // CHECK3-NEXT: [[CONV7:%.*]] = bitcast i32* [[AA_CASTED]] to i16* @@ -833,7 +825,6 @@ int bar(int n){ // CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[AA_ADDR]] to i16* // CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() // CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK3: .execute: // CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp index ddaa298454ed5..3a6716a0eba9a 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_codegen.cpp @@ -2,12 +2,9 @@ // Test target codegen - host bc file has to be created first. // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK1 -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK2 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK3 -// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK4 -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK5 -// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK6 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2 +// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK3 // expected-no-diagnostics #ifndef HEADER #define HEADER @@ -35,583 +32,6 @@ int bar(int n){ } #endif -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l19_worker -// CHECK1-SAME: () #[[ATTR0:[0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: store i8* null, i8** [[WORK_FN]], align 8 -// CHECK1-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK1-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK1: .await.work: -// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK1-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK1-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK1-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK1-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK1-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK1: .select.workers: -// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK1-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK1-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK1: .execute.parallel: -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) -// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK1-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*) -// CHECK1-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] -// CHECK1: .execute.fn: -// CHECK1-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3:[0-9]+]] -// CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK1: .check.next: -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK1-NEXT: call void [[TMP6]](i16 0, i32 [[TMP4]]) -// CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL]] -// CHECK1: .terminate.parallel: -// CHECK1-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK1-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK1: .barrier.parallel: -// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK1-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK1: .exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l19 -// CHECK1-SAME: () #[[ATTR1:[0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK1-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK1-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK1: .worker: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l19_worker() #[[ATTR3]] -// CHECK1-NEXT: br label [[DOTEXIT:%.*]] -// CHECK1: .mastercheck: -// CHECK1-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK1-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK1-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK1-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK1-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK1: .master: -// CHECK1-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK1-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -// CHECK1-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK1: .termination.notifier: -// CHECK1-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK1-NEXT: br label [[DOTEXIT]] -// CHECK1: .exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* @"_openmp_static_kernel$size", align 8 -// CHECK1-NEXT: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i64 [[TMP1]], i16 [[TMP0]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) -// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty* -// CHECK1-NEXT: [[I:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP4]], i32 0, i32 0 -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i32* [[I]] to i8* -// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP15]], i64 1) -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK1-NEXT: [[TMP17:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK1-NEXT: call void @__kmpc_restore_team_static_memory(i16 0, i16 [[TMP17]]) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 -// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] -// CHECK1-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l19_worker -// CHECK2-SAME: () #[[ATTR0:[0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK2-NEXT: store i8* null, i8** [[WORK_FN]], align 8 -// CHECK2-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK2-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK2: .await.work: -// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK2-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK2-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK2-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK2-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK2-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK2: .select.workers: -// CHECK2-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK2-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK2-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK2: .execute.parallel: -// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) -// CHECK2-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK2-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*) -// CHECK2-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] -// CHECK2: .execute.fn: -// CHECK2-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3:[0-9]+]] -// CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK2: .check.next: -// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK2-NEXT: call void [[TMP6]](i16 0, i32 [[TMP4]]) -// CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL]] -// CHECK2: .terminate.parallel: -// CHECK2-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK2-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK2: .barrier.parallel: -// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK2-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK2: .exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l19 -// CHECK2-SAME: () #[[ATTR1:[0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK2-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK2-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK2: .worker: -// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l19_worker() #[[ATTR3]] -// CHECK2-NEXT: br label [[DOTEXIT:%.*]] -// CHECK2: .mastercheck: -// CHECK2-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK2-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK2-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK2-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK2-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK2-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK2: .master: -// CHECK2-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -// CHECK2-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK2: .termination.notifier: -// CHECK2-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK2-NEXT: br label [[DOTEXIT]] -// CHECK2: .exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 1) -// CHECK2-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to %struct._globalized_locals_ty* -// CHECK2-NEXT: [[I:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP1]], i32 0, i32 0 -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 -// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK2: cond.true: -// CHECK2-NEXT: br label [[COND_END:%.*]] -// CHECK2: cond.false: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: br label [[COND_END]] -// CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP11:%.*]] = bitcast i32* [[I]] to i8* -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK2-NEXT: [[TMP12:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP12]], i64 1) -// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK2: omp.body.continue: -// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK2: omp.inner.for.end: -// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK2-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP0]]) -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK2-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** -// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 -// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] -// CHECK2-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l19_worker -// CHECK3-SAME: () #[[ATTR0:[0-9]+]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK3-NEXT: store i8* null, i8** [[WORK_FN]], align 4 -// CHECK3-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK3-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK3: .await.work: -// CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK3-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK3-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK3-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK3-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK3-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK3: .select.workers: -// CHECK3-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK3-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK3-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK3: .execute.parallel: -// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) -// CHECK3-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK3-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*) -// CHECK3-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] -// CHECK3: .execute.fn: -// CHECK3-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3:[0-9]+]] -// CHECK3-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK3: .check.next: -// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK3-NEXT: call void [[TMP6]](i16 0, i32 [[TMP4]]) -// CHECK3-NEXT: br label [[DOTTERMINATE_PARALLEL]] -// CHECK3: .terminate.parallel: -// CHECK3-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK3-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK3: .barrier.parallel: -// CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK3-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK3: .exit: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l19 -// CHECK3-SAME: () #[[ATTR1:[0-9]+]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK3-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK3-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK3-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK3-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK3: .worker: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l19_worker() #[[ATTR3]] -// CHECK3-NEXT: br label [[DOTEXIT:%.*]] -// CHECK3: .mastercheck: -// CHECK3-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK3-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK3-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK3-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK3-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK3-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK3-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK3-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK3: .master: -// CHECK3-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK3-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK3-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -// CHECK3-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK3: .termination.notifier: -// CHECK3-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK3-NEXT: br label [[DOTEXIT]] -// CHECK3: .exit: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* @"_openmp_static_kernel$size", align 4 -// CHECK3-NEXT: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i32 [[TMP1]], i16 [[TMP0]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) -// CHECK3-NEXT: [[TMP2:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 0 -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty* -// CHECK3-NEXT: [[I:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 -// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast i32* [[I]] to i8* -// CHECK3-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP15]], i32 1) -// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK3: omp.body.continue: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK3-NEXT: [[TMP17:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK3-NEXT: call void @__kmpc_restore_team_static_memory(i16 0, i16 [[TMP17]]) -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK3-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] -// CHECK3-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l19_worker // CHECK4-SAME: () #[[ATTR0:[0-9]+]] { // CHECK4-NEXT: entry: @@ -652,8 +72,6 @@ int bar(int n){ // CHECK4-NEXT: br label [[DOTAWAIT_WORK]] // CHECK4: .exit: // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l19 // CHECK4-SAME: () #[[ATTR1:[0-9]+]] { // CHECK4-NEXT: entry: @@ -695,491 +113,1024 @@ int bar(int n){ // CHECK4-NEXT: br label [[DOTEXIT]] // CHECK4: .exit: // CHECK4-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 +// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* @"_openmp_static_kernel$size", align 4 +// CHECK4-NEXT: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i32 [[TMP1]], i16 [[TMP0]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) +// CHECK4-NEXT: [[TMP2:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 0 +// CHECK4-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty* +// CHECK4-NEXT: [[I:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP4]], i32 0, i32 0 +// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 +// CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK4: cond.true: +// CHECK4-NEXT: br label [[COND_END:%.*]] +// CHECK4: cond.false: +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: br label [[COND_END]] +// CHECK4: cond.end: +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] +// CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK4: omp.inner.for.cond: +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] +// CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4: omp.inner.for.body: +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP14:%.*]] = bitcast i32* [[I]] to i8* +// CHECK4-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP15]], i32 1) +// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK4: omp.body.continue: +// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK4: omp.inner.for.inc: +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK4: omp.inner.for.end: +// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK4: omp.loop.exit: +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) +// CHECK4-NEXT: [[TMP17:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 +// CHECK4-NEXT: call void @__kmpc_restore_team_static_memory(i16 0, i16 [[TMP17]]) +// CHECK4-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 +// CHECK4-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK4-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper +// CHECK4-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +// CHECK4-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK4-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK4-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +// CHECK4-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 +// CHECK4-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** +// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4 +// CHECK4-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] +// CHECK4-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l19_worker +// CHECK5-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK5-NEXT: store i8* null, i8** [[WORK_FN]], align 4 +// CHECK5-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK5-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK5: .await.work: +// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK5-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK5-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK5-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK5-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK5-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK5-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK5: .select.workers: +// CHECK5-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK5-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK5-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK5: .execute.parallel: +// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) +// CHECK5-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK5-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*) +// CHECK5-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] +// CHECK5: .execute.fn: +// CHECK5-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3:[0-9]+]] +// CHECK5-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK5: .check.next: +// CHECK5-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK5-NEXT: call void [[TMP6]](i16 0, i32 [[TMP4]]) +// CHECK5-NEXT: br label [[DOTTERMINATE_PARALLEL]] +// CHECK5: .terminate.parallel: +// CHECK5-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK5-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK5: .barrier.parallel: +// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK5-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK5: .exit: +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l19 +// CHECK5-SAME: () #[[ATTR1:[0-9]+]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK5-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK5-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK5-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK5-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK5-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK5: .worker: +// CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l19_worker() #[[ATTR3]] +// CHECK5-NEXT: br label [[DOTEXIT:%.*]] +// CHECK5: .mastercheck: +// CHECK5-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK5-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK5-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK5-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK5-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK5-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK5-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK5-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK5-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK5: .master: +// CHECK5-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK5-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK5-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK5-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK5-NEXT: call void @__kmpc_data_sharing_init_stack() +// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK5-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +// CHECK5-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK5: .termination.notifier: +// CHECK5-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK5-NEXT: br label [[DOTEXIT]] +// CHECK5: .exit: +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 +// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 4, i16 1) +// CHECK5-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to %struct._globalized_locals_ty* +// CHECK5-NEXT: [[I:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP1]], i32 0, i32 0 +// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK5: cond.true: +// CHECK5-NEXT: br label [[COND_END:%.*]] +// CHECK5: cond.false: +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: br label [[COND_END]] +// CHECK5: cond.end: +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK5: omp.inner.for.cond: +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK5: omp.inner.for.body: +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP11:%.*]] = bitcast i32* [[I]] to i8* +// CHECK5-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK5-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP12]], i32 1) +// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK5: omp.body.continue: +// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK5: omp.inner.for.inc: +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK5: omp.inner.for.end: +// CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK5: omp.loop.exit: +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK5-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP0]]) +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK5-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 +// CHECK5-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper +// CHECK5-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK5-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +// CHECK5-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 +// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** +// CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4 +// CHECK5-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] +// CHECK5-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l19_worker +// CHECK6-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK6-NEXT: store i8* null, i8** [[WORK_FN]], align 4 +// CHECK6-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK6-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK6: .await.work: +// CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK6-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK6-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK6-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK6-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK6-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK6-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK6: .select.workers: +// CHECK6-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK6-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK6-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK6: .execute.parallel: +// CHECK6-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) +// CHECK6-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK6-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*) +// CHECK6-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] +// CHECK6: .execute.fn: +// CHECK6-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3:[0-9]+]] +// CHECK6-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK6: .check.next: +// CHECK6-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK6-NEXT: call void [[TMP6]](i16 0, i32 [[TMP4]]) +// CHECK6-NEXT: br label [[DOTTERMINATE_PARALLEL]] +// CHECK6: .terminate.parallel: +// CHECK6-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK6-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK6: .barrier.parallel: +// CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK6-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK6: .exit: +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l19 +// CHECK6-SAME: () #[[ATTR1:[0-9]+]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK6-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK6-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK6-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK6-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK6-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK6-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK6: .worker: +// CHECK6-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l19_worker() #[[ATTR3]] +// CHECK6-NEXT: br label [[DOTEXIT:%.*]] +// CHECK6: .mastercheck: +// CHECK6-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK6-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK6-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK6-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK6-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK6-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK6-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK6-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK6-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK6: .master: +// CHECK6-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK6-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK6-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK6-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK6-NEXT: call void @__kmpc_data_sharing_init_stack() +// CHECK6-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +// CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK6-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +// CHECK6-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK6: .termination.notifier: +// CHECK6-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK6-NEXT: br label [[DOTEXIT]] +// CHECK6: .exit: +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 +// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 4, i16 1) +// CHECK6-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to %struct._globalized_locals_ty* +// CHECK6-NEXT: [[I:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP1]], i32 0, i32 0 +// CHECK6-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 +// CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK6: cond.true: +// CHECK6-NEXT: br label [[COND_END:%.*]] +// CHECK6: cond.false: +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: br label [[COND_END]] +// CHECK6: cond.end: +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] +// CHECK6-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK6-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK6: omp.inner.for.cond: +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] +// CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK6: omp.inner.for.body: +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 +// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP11:%.*]] = bitcast i32* [[I]] to i8* +// CHECK6-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK6-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP12]], i32 1) +// CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK6: omp.body.continue: +// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK6: omp.inner.for.inc: +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK6-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK6: omp.inner.for.end: +// CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK6: omp.loop.exit: +// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) +// CHECK6-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP0]]) +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK6-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 4 +// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK6-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK6-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 +// CHECK6-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper +// CHECK6-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK6-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK6-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +// CHECK6-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 +// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** +// CHECK6-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4 +// CHECK6-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] +// CHECK6-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_worker +// CHECK1-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: store i8* null, i8** [[WORK_FN]], align 8 +// CHECK1-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK1-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK1: .await.work: +// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK1-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK1-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK1-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 +// CHECK1-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK1-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK1: .select.workers: +// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK1-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK1-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK1: .execute.parallel: +// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 8 +// CHECK1-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*) +// CHECK1-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] +// CHECK1: .execute.fn: +// CHECK1-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK1: .check.next: +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK1-NEXT: call void [[TMP6]](i16 0, i32 [[TMP4]]) +// CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL]] +// CHECK1: .terminate.parallel: +// CHECK1-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK1-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK1: .barrier.parallel: +// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK1-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK1: .exit: +// CHECK1-NEXT: ret void // // -// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 -// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* @"_openmp_static_kernel$size", align 4 -// CHECK4-NEXT: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i32 [[TMP1]], i16 [[TMP0]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) -// CHECK4-NEXT: [[TMP2:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 0 -// CHECK4-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty* -// CHECK4-NEXT: [[I:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP4]], i32 0, i32 0 -// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP7]], 9 -// CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK4: cond.true: -// CHECK4-NEXT: br label [[COND_END:%.*]] -// CHECK4: cond.false: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: br label [[COND_END]] -// CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP8]], [[COND_FALSE]] ] -// CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] -// CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP14:%.*]] = bitcast i32* [[I]] to i8* -// CHECK4-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP6]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP15]], i32 1) -// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK4: omp.body.continue: -// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK4: omp.inner.for.end: -// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]]) -// CHECK4-NEXT: [[TMP17:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK4-NEXT: call void @__kmpc_restore_team_static_memory(i16 0, i16 [[TMP17]]) -// CHECK4-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16 +// CHECK1-SAME: () #[[ATTR1:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK1-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK1-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK1: .worker: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_worker() #[[ATTR3]] +// CHECK1-NEXT: br label [[DOTEXIT:%.*]] +// CHECK1: .mastercheck: +// CHECK1-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK1-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK1-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK1-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK1-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK1-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK1: .master: +// CHECK1-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK1-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +// CHECK1-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK1: .termination.notifier: +// CHECK1-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK1-NEXT: br label [[DOTEXIT]] +// CHECK1: .exit: +// CHECK1-NEXT: ret void // // -// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK4-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK4-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 -// CHECK4-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 8 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: [[I:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: [[I_ON_STACK:%.*]] = bitcast i8* [[I]] to i32* +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I_ON_STACK]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i32* [[I_ON_STACK]] to i8* +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP10]], i64 1) +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[I]]) +// CHECK1-NEXT: ret void // // -// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK4-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -// CHECK4-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK4-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK4-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK4-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 -// CHECK4-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** -// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] -// CHECK4-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK1-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 +// CHECK1-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l19_worker -// CHECK5-SAME: () #[[ATTR0:[0-9]+]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK5-NEXT: store i8* null, i8** [[WORK_FN]], align 4 -// CHECK5-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK5-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK5: .await.work: -// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK5-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK5-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK5-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK5-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK5-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK5-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK5: .select.workers: -// CHECK5-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK5-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK5-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK5: .execute.parallel: -// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) -// CHECK5-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK5-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*) -// CHECK5-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] -// CHECK5: .execute.fn: -// CHECK5-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3:[0-9]+]] -// CHECK5-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK5: .check.next: -// CHECK5-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK5-NEXT: call void [[TMP6]](i16 0, i32 [[TMP4]]) -// CHECK5-NEXT: br label [[DOTTERMINATE_PARALLEL]] -// CHECK5: .terminate.parallel: -// CHECK5-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK5-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK5: .barrier.parallel: -// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK5-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK5: .exit: -// CHECK5-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper +// CHECK1-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK1-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +// CHECK1-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i64 0 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 8 +// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] +// CHECK1-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l19 -// CHECK5-SAME: () #[[ATTR1:[0-9]+]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK5-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK5-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK5-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK5-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK5-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK5: .worker: -// CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l19_worker() #[[ATTR3]] -// CHECK5-NEXT: br label [[DOTEXIT:%.*]] -// CHECK5: .mastercheck: -// CHECK5-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK5-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK5-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK5-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK5-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK5-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK5-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK5-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK5-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK5: .master: -// CHECK5-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK5-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK5-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK5-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK5-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK5-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -// CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK5-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -// CHECK5-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK5: .termination.notifier: -// CHECK5-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK5-NEXT: br label [[DOTEXIT]] -// CHECK5: .exit: -// CHECK5-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_worker +// CHECK2-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK2-NEXT: store i8* null, i8** [[WORK_FN]], align 4 +// CHECK2-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK2-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK2: .await.work: +// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK2-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK2-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK2-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK2-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK2-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK2: .select.workers: +// CHECK2-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK2-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK2-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK2: .execute.parallel: +// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) +// CHECK2-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK2-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*) +// CHECK2-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] +// CHECK2: .execute.fn: +// CHECK2-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3:[0-9]+]] +// CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK2: .check.next: +// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK2-NEXT: call void [[TMP6]](i16 0, i32 [[TMP4]]) +// CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL]] +// CHECK2: .terminate.parallel: +// CHECK2-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK2-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK2: .barrier.parallel: +// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK2-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK2: .exit: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16 +// CHECK2-SAME: () #[[ATTR1:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK2-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK2-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK2: .worker: +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_worker() #[[ATTR3]] +// CHECK2-NEXT: br label [[DOTEXIT:%.*]] +// CHECK2: .mastercheck: +// CHECK2-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK2-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK2-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK2-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK2-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK2-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK2: .master: +// CHECK2-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +// CHECK2-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK2: .termination.notifier: +// CHECK2-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK2-NEXT: br label [[DOTEXIT]] +// CHECK2: .exit: +// CHECK2-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 -// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 4, i16 1) -// CHECK5-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to %struct._globalized_locals_ty* -// CHECK5-NEXT: [[I:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP1]], i32 0, i32 0 -// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 -// CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK5: cond.true: -// CHECK5-NEXT: br label [[COND_END:%.*]] -// CHECK5: cond.false: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: br label [[COND_END]] -// CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = bitcast i32* [[I]] to i8* -// CHECK5-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK5-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP12]], i32 1) -// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK5: omp.body.continue: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK5: omp.inner.for.end: -// CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK5-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP0]]) -// CHECK5-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: [[I:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[I_ON_STACK:%.*]] = bitcast i8* [[I]] to i32* +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I_ON_STACK]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP9:%.*]] = bitcast i32* [[I_ON_STACK]] to i8* +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP10]], i32 1) +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[I]]) +// CHECK2-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK5-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 -// CHECK5-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK2-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 +// CHECK2-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK2-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK5-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK5-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK5-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 -// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** -// CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4 -// CHECK5-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] -// CHECK5-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper +// CHECK2-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK2-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +// CHECK2-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** +// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4 +// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] +// CHECK2-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l19_worker -// CHECK6-SAME: () #[[ATTR0:[0-9]+]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK6-NEXT: store i8* null, i8** [[WORK_FN]], align 4 -// CHECK6-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK6-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK6: .await.work: -// CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK6-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK6-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK6-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK6-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK6-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK6-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK6: .select.workers: -// CHECK6-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK6-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK6-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK6: .execute.parallel: -// CHECK6-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) -// CHECK6-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK6-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*) -// CHECK6-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] -// CHECK6: .execute.fn: -// CHECK6-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3:[0-9]+]] -// CHECK6-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK6: .check.next: -// CHECK6-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK6-NEXT: call void [[TMP6]](i16 0, i32 [[TMP4]]) -// CHECK6-NEXT: br label [[DOTTERMINATE_PARALLEL]] -// CHECK6: .terminate.parallel: -// CHECK6-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK6-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK6: .barrier.parallel: -// CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK6-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK6: .exit: -// CHECK6-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_worker +// CHECK3-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK3-NEXT: store i8* null, i8** [[WORK_FN]], align 4 +// CHECK3-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK3-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK3: .await.work: +// CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK3-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK3-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK3-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK3-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK3-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK3: .select.workers: +// CHECK3-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK3-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK3-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK3: .execute.parallel: +// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2:[0-9]+]]) +// CHECK3-NEXT: [[TMP5:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK3-NEXT: [[WORK_MATCH:%.*]] = icmp eq i8* [[TMP5]], bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*) +// CHECK3-NEXT: br i1 [[WORK_MATCH]], label [[DOTEXECUTE_FN:%.*]], label [[DOTCHECK_NEXT:%.*]] +// CHECK3: .execute.fn: +// CHECK3-NEXT: call void @__omp_outlined__1_wrapper(i16 0, i32 [[TMP4]]) #[[ATTR3:[0-9]+]] +// CHECK3-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK3: .check.next: +// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK3-NEXT: call void [[TMP6]](i16 0, i32 [[TMP4]]) +// CHECK3-NEXT: br label [[DOTTERMINATE_PARALLEL]] +// CHECK3: .terminate.parallel: +// CHECK3-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK3-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK3: .barrier.parallel: +// CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK3-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK3: .exit: +// CHECK3-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l19 -// CHECK6-SAME: () #[[ATTR1:[0-9]+]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK6-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK6-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK6-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK6-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK6-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK6-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK6: .worker: -// CHECK6-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l19_worker() #[[ATTR3]] -// CHECK6-NEXT: br label [[DOTEXIT:%.*]] -// CHECK6: .mastercheck: -// CHECK6-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK6-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK6-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK6-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK6-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK6-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK6-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK6-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK6-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK6: .master: -// CHECK6-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK6-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK6-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK6-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK6-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK6-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) -// CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK6-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] -// CHECK6-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK6: .termination.notifier: -// CHECK6-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK6-NEXT: br label [[DOTEXIT]] -// CHECK6: .exit: -// CHECK6-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16 +// CHECK3-SAME: () #[[ATTR1:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK3-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK3-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK3-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK3-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK3: .worker: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l16_worker() #[[ATTR3]] +// CHECK3-NEXT: br label [[DOTEXIT:%.*]] +// CHECK3: .mastercheck: +// CHECK3-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK3-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK3-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK3-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK3-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK3-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK3-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK3: .master: +// CHECK3-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK3-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK3-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB2]]) +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]]) #[[ATTR3]] +// CHECK3-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK3: .termination.notifier: +// CHECK3-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK3-NEXT: br label [[DOTEXIT]] +// CHECK3: .exit: +// CHECK3-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 -// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 4, i16 1) -// CHECK6-NEXT: [[TMP1:%.*]] = bitcast i8* [[TMP0]] to %struct._globalized_locals_ty* -// CHECK6-NEXT: [[I:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP1]], i32 0, i32 0 -// CHECK6-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 -// CHECK6-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP3]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK6-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP4]], 9 -// CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK6: cond.true: -// CHECK6-NEXT: br label [[COND_END:%.*]] -// CHECK6: cond.false: -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: br label [[COND_END]] -// CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP5]], [[COND_FALSE]] ] -// CHECK6-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK6-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] -// CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP11:%.*]] = bitcast i32* [[I]] to i8* -// CHECK6-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK6-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP3]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP12]], i32 1) -// CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK6: omp.body.continue: -// CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK6-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK6: omp.inner.for.end: -// CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK6: omp.loop.exit: -// CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]]) -// CHECK6-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP0]]) -// CHECK6-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [1 x i8*], align 4 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: [[I:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK3-NEXT: [[I_ON_STACK:%.*]] = bitcast i8* [[I]] to i32* +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP1]], i32 92, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP2]], 9 +// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP3]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I_ON_STACK]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = bitcast i32* [[I_ON_STACK]] to i8* +// CHECK3-NEXT: store i8* [[TMP9]], i8** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = bitcast [1 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB2]], i32 [[TMP1]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*)* @__omp_outlined__1 to i8*), i8* bitcast (void (i16, i32)* @__omp_outlined__1_wrapper to i8*), i8** [[TMP10]], i32 1) +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3: omp.body.continue: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[I]]) +// CHECK3-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK6-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 -// CHECK6-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 -// CHECK6-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[I:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[I_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32* [[I]], i32** [[I_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[I_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK3-NEXT: [[INC:%.*]] = add nsw i32 [[TMP1]], 1 +// CHECK3-NEXT: store i32 [[INC]], i32* [[TMP0]], align 4 +// CHECK3-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper -// CHECK6-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 -// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK6-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 -// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) -// CHECK6-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 -// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** -// CHECK6-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4 -// CHECK6-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] -// CHECK6-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1_wrapper +// CHECK3-SAME: (i16 zeroext [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[GLOBAL_ARGS:%.*]] = alloca i8**, align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK3-NEXT: store i16 [[TMP0]], i16* [[DOTADDR]], align 2 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: call void @__kmpc_get_shared_variables(i8*** [[GLOBAL_ARGS]]) +// CHECK3-NEXT: [[TMP2:%.*]] = load i8**, i8*** [[GLOBAL_ARGS]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8*, i8** [[TMP2]], i32 0 +// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8** [[TMP3]] to i32** +// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[TMP4]], align 4 +// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTADDR1]], i32* [[DOTZERO_ADDR]], i32* [[TMP5]]) #[[ATTR3]] +// CHECK3-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp index 5e5f06d2852c6..4256228fdea9a 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_codegen.cpp @@ -1,26 +1,11 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ // Test target codegen - host bc file has to be created first. -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK1 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK2 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -fopenmp-optimistic-collapse -o - | FileCheck %s --check-prefix=CHECK3 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -fopenmp-optimistic-collapse -o - -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK4 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK5 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK6 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK7 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK8 - // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK9 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK10 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -fopenmp-optimistic-collapse -o - | FileCheck %s --check-prefix=CHECK11 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -fopenmp-optimistic-collapse -o - -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK12 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK1 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -fopenmp-optimistic-collapse -o - | FileCheck %s --check-prefix=CHECK2 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK13 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK14 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK15 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK16 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK3 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK4 // expected-no-diagnostics #ifndef HEADER @@ -86,6438 +71,6 @@ int bar(int n){ } #endif -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 -// CHECK1-SAME: (i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK1-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK1: .execute: -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[L_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[L_CASTED]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]], [1000 x i32]* [[TMP0]], i64 [[TMP5]]) #[[ATTR2:[0-9]+]] -// CHECK1-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK1: .omp.deinit: -// CHECK1-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK1-NEXT: br label [[DOTEXIT:%.*]] -// CHECK1: .exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 8 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* @"_openmp_static_kernel$size", align 8 -// CHECK1-NEXT: call void @__kmpc_get_team_static_memory(i16 1, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i64 [[TMP2]], i16 [[TMP1]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TMP3]], i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct._globalized_locals_ty* -// CHECK1-NEXT: [[L2:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK1: omp.precond.then: -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP23]], i32* [[CONV8]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[L_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP25]], i32* [[CONV9]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i64, i64* [[L_CASTED]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP28:%.*]] = inttoptr i64 [[TMP20]] to i8* -// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP30:%.*]] = inttoptr i64 [[TMP22]] to i8* -// CHECK1-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP32:%.*]] = inttoptr i64 [[TMP24]] to i8* -// CHECK1-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP34:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK1-NEXT: [[TMP36:%.*]] = inttoptr i64 [[TMP26]] to i8* -// CHECK1-NEXT: store i8* [[TMP36]], i8** [[TMP35]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP38]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i64)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP39]], i64 5) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP46]], [[TMP47]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] -// CHECK1: cond.true14: -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: br label [[COND_END16:%.*]] -// CHECK1: cond.false15: -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END16]] -// CHECK1: cond.end16: -// CHECK1-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP48]], [[COND_TRUE14]] ], [ [[TMP49]], [[COND_FALSE15]] ] -// CHECK1-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP50]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP51:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[TMP51]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP52]]) -// CHECK1-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP54:%.*]] = icmp ne i32 [[TMP53]], 0 -// CHECK1-NEXT: br i1 [[TMP54]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP55:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK1-NEXT: store i32 [[TMP55]], i32* [[CONV1]], align 8 -// CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: br label [[OMP_PRECOND_END]] -// CHECK1: omp.precond.end: -// CHECK1-NEXT: [[TMP56:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK1-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP56]]) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK1: omp.precond.then: -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] -// CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV9:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP11]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK1-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK1-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] -// CHECK1: omp.dispatch.body: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] -// CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK1-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] -// CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK1-NEXT: store i32 [[TMP30]], i32* [[CONV1]], align 8 -// CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: br label [[OMP_PRECOND_END]] -// CHECK1: omp.precond.end: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 -// CHECK1-SAME: (i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK1-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK1: .execute: -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] -// CHECK1-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK1: .omp.deinit: -// CHECK1-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK1-NEXT: br label [[DOTEXIT:%.*]] -// CHECK1: .exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK1: omp.precond.then: -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to i8* -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to i8* -// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP27:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP30]], i64 4) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] -// CHECK1: cond.true11: -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: br label [[COND_END13:%.*]] -// CHECK1: cond.false12: -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END13]] -// CHECK1: cond.end13: -// CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE11]] ], [ [[TMP40]], [[COND_FALSE12]] ] -// CHECK1-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP41]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP43]]) -// CHECK1-NEXT: br label [[OMP_PRECOND_END]] -// CHECK1: omp.precond.end: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK1: omp.precond.then: -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CONV6:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 -// CHECK1-NEXT: [[CONV8:%.*]] = sext i16 [[TMP14]] to i32 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], 1 -// CHECK1-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i16 -// CHECK1-NEXT: store i16 [[CONV10]], i16* [[ARRAYIDX]], align 2 -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK1-NEXT: br label [[OMP_PRECOND_END]] -// CHECK1: omp.precond.end: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 -// CHECK1-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK1-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK1: .execute: -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] -// CHECK1-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK1: .omp.deinit: -// CHECK1-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK1-NEXT: br label [[DOTEXIT:%.*]] -// CHECK1: .exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 -// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to i8* -// CHECK1-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to i8* -// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP17]], i64 3) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 -// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK1: cond.true5: -// CHECK1-NEXT: br label [[COND_END7:%.*]] -// CHECK1: cond.false6: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END7]] -// CHECK1: cond.end7: -// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] -// CHECK1-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59 -// CHECK1-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* -// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK1-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK1: .execute: -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[F_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[F_CASTED]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP3]]) #[[ATTR2]] -// CHECK1-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK1: .omp.deinit: -// CHECK1-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK1-NEXT: br label [[DOTEXIT:%.*]] -// CHECK1: .exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__6 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[F_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[F_CASTED]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to i8* -// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to i8* -// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP12]] to i8* -// CHECK1-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x [10 x i32]]*, i64)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP21]], i64 4) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP28]], 99 -// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] -// CHECK1: cond.true7: -// CHECK1-NEXT: br label [[COND_END9:%.*]] -// CHECK1: cond.false8: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END9]] -// CHECK1: cond.end9: -// CHECK1-NEXT: [[COND10:%.*]] = phi i32 [ 99, [[COND_TRUE7]] ], [ [[TMP29]], [[COND_FALSE8]] ] -// CHECK1-NEXT: store i32 [[COND10]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP30]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__7 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CONV4:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV4]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV5:%.*]] = sdiv i32 [[TMP10]], 10 -// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[DIV5]], 10 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL6]] -// CHECK1-NEXT: [[MUL7:%.*]] = mul nsw i32 [[SUB]], 1 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL7]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[J]], align 4 -// CHECK1-NEXT: store i32 10, i32* [[K]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP11]], [[MUL9]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD10]], [[TMP14]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 -// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM12]] -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[ARRAYIDX13]], align 4 -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l67 -// CHECK1-SAME: (i64 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK1-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK1: .execute: -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR2]] -// CHECK1-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK1: .omp.deinit: -// CHECK1-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK1-NEXT: br label [[DOTEXIT:%.*]] -// CHECK1: .exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__8 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I10:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[J11:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[CONV4:%.*]] = sext i32 [[DIV]] to i64 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[SUB5:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK1-NEXT: [[DIV6:%.*]] = sdiv i32 [[SUB5]], 1 -// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[DIV6]] to i64 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV4]], [[CONV7]] -// CHECK1-NEXT: [[SUB8:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK1-NEXT: store i64 [[SUB8]], i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] -// CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK1: land.lhs.true: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp slt i32 0, [[TMP6]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] -// CHECK1: omp.precond.then: -// CHECK1-NEXT: store i64 0, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK1-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[CONV12:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV12]]) -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i64 [[COND]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP14]], i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP16]], 1 -// CHECK1-NEXT: [[CMP14:%.*]] = icmp slt i64 [[TMP15]], [[ADD]] -// CHECK1-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[CONV15:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[CONV15]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK1-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP18]] to i8* -// CHECK1-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP20]] to i8* -// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP28:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [10 x [10 x i32]]*)* @__omp_outlined__9 to i8*), i8* null, i8** [[TMP31]], i64 4) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP32]], [[TMP33]] -// CHECK1-NEXT: store i64 [[ADD16]], i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP34]], [[TMP35]] -// CHECK1-NEXT: store i64 [[ADD17]], i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i64 [[TMP36]], [[TMP37]] -// CHECK1-NEXT: store i64 [[ADD18]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP39:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK1-NEXT: [[CMP19:%.*]] = icmp sgt i64 [[TMP38]], [[TMP39]] -// CHECK1-NEXT: br i1 [[CMP19]], label [[COND_TRUE20:%.*]], label [[COND_FALSE21:%.*]] -// CHECK1: cond.true20: -// CHECK1-NEXT: [[TMP40:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK1-NEXT: br label [[COND_END22:%.*]] -// CHECK1: cond.false21: -// CHECK1-NEXT: [[TMP41:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: br label [[COND_END22]] -// CHECK1: cond.end22: -// CHECK1-NEXT: [[COND23:%.*]] = phi i64 [ [[TMP40]], [[COND_TRUE20]] ], [ [[TMP41]], [[COND_FALSE21]] ] -// CHECK1-NEXT: store i64 [[COND23]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP42]], i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]]) -// CHECK1-NEXT: br label [[OMP_PRECOND_END]] -// CHECK1: omp.precond.end: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I10:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[J11:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[CONV4:%.*]] = sext i32 [[DIV]] to i64 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[SUB5:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK1-NEXT: [[DIV6:%.*]] = sdiv i32 [[SUB5]], 1 -// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[DIV6]] to i64 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV4]], [[CONV7]] -// CHECK1-NEXT: [[SUB8:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK1-NEXT: store i64 [[SUB8]], i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] -// CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK1: land.lhs.true: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[CMP9:%.*]] = icmp slt i32 0, [[TMP6]] -// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] -// CHECK1: omp.precond.then: -// CHECK1-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK1-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[TMP8]], i64* [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_UB]], align 8 -// CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 33, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK1-NEXT: store i64 [[TMP12]], i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP12:%.*]] = icmp ule i64 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP16]], 0 -// CHECK1-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 -// CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] -// CHECK1-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 -// CHECK1-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP15]], [[CONV16]] -// CHECK1-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] -// CHECK1-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK1-NEXT: store i32 [[CONV19]], i32* [[I10]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP19]], 0 -// CHECK1-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 -// CHECK1-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] -// CHECK1-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 -// CHECK1-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP18]], [[CONV23]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP20]], 0 -// CHECK1-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 -// CHECK1-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] -// CHECK1-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 -// CHECK1-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] -// CHECK1-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP17]], [[MUL29]] -// CHECK1-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 -// CHECK1-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] -// CHECK1-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 -// CHECK1-NEXT: store i32 [[CONV33]], i32* [[J11]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I10]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[J11]], align 4 -// CHECK1-NEXT: [[ADD34:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[I10]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[J11]], align 4 -// CHECK1-NEXT: [[IDXPROM35:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK1-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM35]] -// CHECK1-NEXT: store i32 [[ADD34]], i32* [[ARRAYIDX36]], align 4 -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK1-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] -// CHECK1-NEXT: store i64 [[ADD37]], i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) -// CHECK1-NEXT: br label [[OMP_PRECOND_END]] -// CHECK1: omp.precond.end: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74 -// CHECK1-SAME: (i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK1-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK1-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK1: .execute: -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[V_ADDR]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]], [1000 x i32]* [[TMP0]], i32* [[TMP4]]) #[[ATTR2]] -// CHECK1-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK1: .omp.deinit: -// CHECK1-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK1-NEXT: br label [[DOTEXIT:%.*]] -// CHECK1: .exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK1-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 8 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK1: omp.precond.then: -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[V_ADDR]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP15]] to i8* -// CHECK1-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK1-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP19]] to i8* -// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP28:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK1-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP20]] to i8* -// CHECK1-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @__omp_outlined__11 to i8*), i8* null, i8** [[TMP33]], i64 5) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] -// CHECK1: cond.true11: -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: br label [[COND_END13:%.*]] -// CHECK1: cond.false12: -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END13]] -// CHECK1: cond.end13: -// CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE11]] ], [ [[TMP43]], [[COND_FALSE12]] ] -// CHECK1-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP44]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP46]]) -// CHECK1-NEXT: br label [[OMP_PRECOND_END]] -// CHECK1: omp.precond.end: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__11 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK1-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK1: omp.precond.then: -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CONV6:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[V_ADDR]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM8]] -// CHECK1-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX9]], align 4 -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK1-NEXT: br label [[OMP_PRECOND_END]] -// CHECK1: omp.precond.end: -// CHECK1-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 -// CHECK2-SAME: (i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK2: .execute: -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[L_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[L_CASTED]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]], [1000 x i32]* [[TMP0]], i64 [[TMP5]]) #[[ATTR2:[0-9]+]] -// CHECK2-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK2: .omp.deinit: -// CHECK2-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK2-NEXT: br label [[DOTEXIT:%.*]] -// CHECK2: .exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 8 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 1) -// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct._globalized_locals_ty* -// CHECK2-NEXT: [[L2:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP2]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK2-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] -// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK2: omp.precond.then: -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK2: cond.true: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: br label [[COND_END:%.*]] -// CHECK2: cond.false: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: br label [[COND_END]] -// CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] -// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] -// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[L_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i64, i64* [[L_CASTED]], align 8 -// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 -// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP19]] to i8* -// CHECK2-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 -// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP29:%.*]] = inttoptr i64 [[TMP21]] to i8* -// CHECK2-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 8 -// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK2-NEXT: [[TMP31:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 8 -// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK2-NEXT: [[TMP33:%.*]] = inttoptr i64 [[TMP23]] to i8* -// CHECK2-NEXT: store i8* [[TMP33]], i8** [[TMP32]], align 8 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP35]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i64)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP36]], i64 5) -// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] -// CHECK2-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] -// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] -// CHECK2-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]] -// CHECK2-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] -// CHECK2: cond.true14: -// CHECK2-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: br label [[COND_END16:%.*]] -// CHECK2: cond.false15: -// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: br label [[COND_END16]] -// CHECK2: cond.end16: -// CHECK2-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE14]] ], [ [[TMP46]], [[COND_FALSE15]] ] -// CHECK2-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP47]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK2: omp.inner.for.end: -// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP48:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[TMP48]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP49]]) -// CHECK2-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0 -// CHECK2-NEXT: br i1 [[TMP51]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK2: .omp.lastprivate.then: -// CHECK2-NEXT: [[TMP52:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK2-NEXT: store i32 [[TMP52]], i32* [[CONV1]], align 8 -// CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK2: .omp.lastprivate.done: -// CHECK2-NEXT: br label [[OMP_PRECOND_END]] -// CHECK2: omp.precond.end: -// CHECK2-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP1]]) -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I6:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK2-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK2: omp.precond.then: -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK2-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) -// CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] -// CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV7:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP10]] -// CHECK2-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK2: cond.true: -// CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: br label [[COND_END:%.*]] -// CHECK2: cond.false: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV9:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK2-NEXT: br label [[COND_END]] -// CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP11]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK2-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK2-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] -// CHECK2: omp.dispatch.body: -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK2-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 -// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK2: omp.body.continue: -// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK2: omp.inner.for.end: -// CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] -// CHECK2: omp.dispatch.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK2-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK2-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] -// CHECK2: omp.dispatch.end: -// CHECK2-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK2-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK2: .omp.lastprivate.then: -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK2-NEXT: store i32 [[TMP30]], i32* [[CONV1]], align 8 -// CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK2: .omp.lastprivate.done: -// CHECK2-NEXT: br label [[OMP_PRECOND_END]] -// CHECK2: omp.precond.end: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 -// CHECK2-SAME: (i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK2: .execute: -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] -// CHECK2-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK2: .omp.deinit: -// CHECK2-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK2-NEXT: br label [[DOTEXIT:%.*]] -// CHECK2: .exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 -// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK2: omp.precond.then: -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] -// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: br label [[COND_END:%.*]] -// CHECK2: cond.false: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: br label [[COND_END]] -// CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] -// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] -// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to i8* -// CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 -// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to i8* -// CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 -// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK2-NEXT: [[TMP27:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 -// CHECK2-NEXT: [[TMP30:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP30]], i64 4) -// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]] -// CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] -// CHECK2: cond.true11: -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: br label [[COND_END13:%.*]] -// CHECK2: cond.false12: -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: br label [[COND_END13]] -// CHECK2: cond.end13: -// CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE11]] ], [ [[TMP40]], [[COND_FALSE12]] ] -// CHECK2-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP41]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK2: omp.inner.for.end: -// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP43]]) -// CHECK2-NEXT: br label [[OMP_PRECOND_END]] -// CHECK2: omp.precond.end: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 -// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK2: omp.precond.then: -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK2-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CONV6:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP11]] -// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 -// CHECK2-NEXT: [[CONV8:%.*]] = sext i16 [[TMP14]] to i32 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], 1 -// CHECK2-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i16 -// CHECK2-NEXT: store i16 [[CONV10]], i16* [[ARRAYIDX]], align 2 -// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK2: omp.body.continue: -// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK2: omp.inner.for.end: -// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK2-NEXT: br label [[OMP_PRECOND_END]] -// CHECK2: omp.precond.end: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 -// CHECK2-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK2: .execute: -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] -// CHECK2-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK2: .omp.deinit: -// CHECK2-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK2-NEXT: br label [[DOTEXIT:%.*]] -// CHECK2: .exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 -// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK2: cond.true: -// CHECK2-NEXT: br label [[COND_END:%.*]] -// CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: br label [[COND_END]] -// CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] -// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 -// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to i8* -// CHECK2-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to i8* -// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP16:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK2-NEXT: [[TMP17:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP17]], i64 3) -// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 -// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK2: cond.true5: -// CHECK2-NEXT: br label [[COND_END7:%.*]] -// CHECK2: cond.false6: -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: br label [[COND_END7]] -// CHECK2: cond.end7: -// CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] -// CHECK2-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK2: omp.inner.for.end: -// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK2-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]] -// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK2: omp.body.continue: -// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK2: omp.inner.for.end: -// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59 -// CHECK2-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* -// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK2: .execute: -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[F_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[F_CASTED]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP3]]) #[[ATTR2]] -// CHECK2-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK2: .omp.deinit: -// CHECK2-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK2-NEXT: br label [[DOTEXIT:%.*]] -// CHECK2: .exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__6 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 -// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK2: cond.true: -// CHECK2-NEXT: br label [[COND_END:%.*]] -// CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: br label [[COND_END]] -// CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] -// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 -// CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[F_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP11]], i32* [[CONV3]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[F_CASTED]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to i8* -// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to i8* -// CHECK2-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP18:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK2-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP12]] to i8* -// CHECK2-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x [10 x i32]]*, i64)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP21]], i64 4) -// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK2-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP28]], 99 -// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] -// CHECK2: cond.true7: -// CHECK2-NEXT: br label [[COND_END9:%.*]] -// CHECK2: cond.false8: -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: br label [[COND_END9]] -// CHECK2: cond.end9: -// CHECK2-NEXT: [[COND10:%.*]] = phi i32 [ 99, [[COND_TRUE7]] ], [ [[TMP29]], [[COND_FALSE8]] ] -// CHECK2-NEXT: store i32 [[COND10]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP30]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK2: omp.inner.for.end: -// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__7 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK2-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CONV4:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV4]], [[TMP7]] -// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[DIV5:%.*]] = sdiv i32 [[TMP10]], 10 -// CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[DIV5]], 10 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL6]] -// CHECK2-NEXT: [[MUL7:%.*]] = mul nsw i32 [[SUB]], 1 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL7]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[J]], align 4 -// CHECK2-NEXT: store i32 10, i32* [[K]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP11]], [[MUL9]] -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 -// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD10]], [[TMP14]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 -// CHECK2-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM12]] -// CHECK2-NEXT: store i32 [[ADD11]], i32* [[ARRAYIDX13]], align 4 -// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK2: omp.body.continue: -// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK2-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK2: omp.inner.for.end: -// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l67 -// CHECK2-SAME: (i64 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK2: .execute: -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR2]] -// CHECK2-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK2: .omp.deinit: -// CHECK2-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK2-NEXT: br label [[DOTEXIT:%.*]] -// CHECK2: .exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__8 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I10:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[J11:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 -// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[CONV4:%.*]] = sext i32 [[DIV]] to i64 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[SUB5:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK2-NEXT: [[DIV6:%.*]] = sdiv i32 [[SUB5]], 1 -// CHECK2-NEXT: [[CONV7:%.*]] = sext i32 [[DIV6]] to i64 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV4]], [[CONV7]] -// CHECK2-NEXT: [[SUB8:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK2-NEXT: store i64 [[SUB8]], i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] -// CHECK2-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK2: land.lhs.true: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[CMP9:%.*]] = icmp slt i32 0, [[TMP6]] -// CHECK2-NEXT: br i1 [[CMP9]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] -// CHECK2: omp.precond.then: -// CHECK2-NEXT: store i64 0, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK2-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK2-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[CONV12:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV12]]) -// CHECK2-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK2-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]] -// CHECK2-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK2: cond.true: -// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK2-NEXT: br label [[COND_END:%.*]] -// CHECK2: cond.false: -// CHECK2-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK2-NEXT: br label [[COND_END]] -// CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] -// CHECK2-NEXT: store i64 [[COND]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK2-NEXT: store i64 [[TMP14]], i64* [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP16]], 1 -// CHECK2-NEXT: [[CMP14:%.*]] = icmp slt i64 [[TMP15]], [[ADD]] -// CHECK2-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[CONV15:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP19]], i32* [[CONV15]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK2-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 -// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP18]] to i8* -// CHECK2-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 -// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP20]] to i8* -// CHECK2-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 -// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK2-NEXT: [[TMP28:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 -// CHECK2-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK2-NEXT: [[TMP31:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [10 x [10 x i32]]*)* @__omp_outlined__9 to i8*), i8* null, i8** [[TMP31]], i64 4) -// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP32]], [[TMP33]] -// CHECK2-NEXT: store i64 [[ADD16]], i64* [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK2-NEXT: [[TMP35:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK2-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP34]], [[TMP35]] -// CHECK2-NEXT: store i64 [[ADD17]], i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK2-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK2-NEXT: [[TMP37:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK2-NEXT: [[ADD18:%.*]] = add nsw i64 [[TMP36]], [[TMP37]] -// CHECK2-NEXT: store i64 [[ADD18]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK2-NEXT: [[TMP38:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK2-NEXT: [[TMP39:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK2-NEXT: [[CMP19:%.*]] = icmp sgt i64 [[TMP38]], [[TMP39]] -// CHECK2-NEXT: br i1 [[CMP19]], label [[COND_TRUE20:%.*]], label [[COND_FALSE21:%.*]] -// CHECK2: cond.true20: -// CHECK2-NEXT: [[TMP40:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK2-NEXT: br label [[COND_END22:%.*]] -// CHECK2: cond.false21: -// CHECK2-NEXT: [[TMP41:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK2-NEXT: br label [[COND_END22]] -// CHECK2: cond.end22: -// CHECK2-NEXT: [[COND23:%.*]] = phi i64 [ [[TMP40]], [[COND_TRUE20]] ], [ [[TMP41]], [[COND_FALSE21]] ] -// CHECK2-NEXT: store i64 [[COND23]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK2-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK2-NEXT: store i64 [[TMP42]], i64* [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK2: omp.inner.for.end: -// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]]) -// CHECK2-NEXT: br label [[OMP_PRECOND_END]] -// CHECK2: omp.precond.end: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I10:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[J11:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 -// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[CONV4:%.*]] = sext i32 [[DIV]] to i64 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[SUB5:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK2-NEXT: [[DIV6:%.*]] = sdiv i32 [[SUB5]], 1 -// CHECK2-NEXT: [[CONV7:%.*]] = sext i32 [[DIV6]] to i64 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV4]], [[CONV7]] -// CHECK2-NEXT: [[SUB8:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK2-NEXT: store i64 [[SUB8]], i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] -// CHECK2-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK2: land.lhs.true: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[CMP9:%.*]] = icmp slt i32 0, [[TMP6]] -// CHECK2-NEXT: br i1 [[CMP9]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] -// CHECK2: omp.precond.then: -// CHECK2-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK2-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_UB]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[TMP8]], i64* [[DOTOMP_LB]], align 8 -// CHECK2-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_UB]], align 8 -// CHECK2-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 33, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK2-NEXT: store i64 [[TMP12]], i64* [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP12:%.*]] = icmp ule i64 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP16]], 0 -// CHECK2-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 -// CHECK2-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] -// CHECK2-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 -// CHECK2-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP15]], [[CONV16]] -// CHECK2-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] -// CHECK2-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK2-NEXT: store i32 [[CONV19]], i32* [[I10]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP19]], 0 -// CHECK2-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 -// CHECK2-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] -// CHECK2-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 -// CHECK2-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP18]], [[CONV23]] -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP20]], 0 -// CHECK2-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 -// CHECK2-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] -// CHECK2-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 -// CHECK2-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] -// CHECK2-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP17]], [[MUL29]] -// CHECK2-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 -// CHECK2-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] -// CHECK2-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 -// CHECK2-NEXT: store i32 [[CONV33]], i32* [[J11]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I10]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[J11]], align 4 -// CHECK2-NEXT: [[ADD34:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[I10]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[J11]], align 4 -// CHECK2-NEXT: [[IDXPROM35:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK2-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM35]] -// CHECK2-NEXT: store i32 [[ADD34]], i32* [[ARRAYIDX36]], align 4 -// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK2: omp.body.continue: -// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK2-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] -// CHECK2-NEXT: store i64 [[ADD37]], i64* [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK2: omp.inner.for.end: -// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) -// CHECK2-NEXT: br label [[OMP_PRECOND_END]] -// CHECK2: omp.precond.end: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74 -// CHECK2-SAME: (i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK2-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK2: .execute: -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[V_ADDR]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]], [1000 x i32]* [[TMP0]], i32* [[TMP4]]) #[[ATTR2]] -// CHECK2-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK2: .omp.deinit: -// CHECK2-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK2-NEXT: br label [[DOTEXIT:%.*]] -// CHECK2: .exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK2-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 8 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK2: omp.precond.then: -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] -// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: br label [[COND_END:%.*]] -// CHECK2: cond.false: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: br label [[COND_END]] -// CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] -// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] -// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32*, i32** [[V_ADDR]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP15]] to i8* -// CHECK2-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 -// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK2-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 -// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP19]] to i8* -// CHECK2-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 -// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK2-NEXT: [[TMP28:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 -// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK2-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP20]] to i8* -// CHECK2-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8 -// CHECK2-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @__omp_outlined__11 to i8*), i8* null, i8** [[TMP33]], i64 5) -// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] -// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] -// CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] -// CHECK2: cond.true11: -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: br label [[COND_END13:%.*]] -// CHECK2: cond.false12: -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: br label [[COND_END13]] -// CHECK2: cond.end13: -// CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE11]] ], [ [[TMP43]], [[COND_FALSE12]] ] -// CHECK2-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP44]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK2: omp.inner.for.end: -// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP46]]) -// CHECK2-NEXT: br label [[OMP_PRECOND_END]] -// CHECK2: omp.precond.end: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__11 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK2-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK2: omp.precond.then: -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK2-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CONV6:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP11]] -// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[V_ADDR]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK2-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM8]] -// CHECK2-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX9]], align 4 -// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK2: omp.body.continue: -// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK2-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK2: omp.inner.for.end: -// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK2-NEXT: br label [[OMP_PRECOND_END]] -// CHECK2: omp.precond.end: -// CHECK2-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 -// CHECK3-SAME: (i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK3-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK3-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK3-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK3: .execute: -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK3-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK3-NEXT: [[CONV3:%.*]] = bitcast i64* [[L_CASTED]] to i32* -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[L_CASTED]], align 8 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]], [1000 x i32]* [[TMP0]], i64 [[TMP5]]) #[[ATTR2:[0-9]+]] -// CHECK3-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK3: .omp.deinit: -// CHECK3-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK3-NEXT: br label [[DOTEXIT:%.*]] -// CHECK3: .exit: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK3-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 8 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK3-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK3-NEXT: [[TMP2:%.*]] = load i64, i64* @"_openmp_static_kernel$size", align 8 -// CHECK3-NEXT: call void @__kmpc_get_team_static_memory(i16 1, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i64 [[TMP2]], i16 [[TMP1]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) -// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TMP3]], i64 0 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct._globalized_locals_ty* -// CHECK3-NEXT: [[L2:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK3-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK3-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK3: omp.precond.then: -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK3-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK3-NEXT: store i32 [[TMP23]], i32* [[CONV8]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK3-NEXT: [[CONV9:%.*]] = bitcast i64* [[L_CASTED]] to i32* -// CHECK3-NEXT: store i32 [[TMP25]], i32* [[CONV9]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i64, i64* [[L_CASTED]], align 8 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP28:%.*]] = inttoptr i64 [[TMP20]] to i8* -// CHECK3-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK3-NEXT: [[TMP30:%.*]] = inttoptr i64 [[TMP22]] to i8* -// CHECK3-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK3-NEXT: [[TMP32:%.*]] = inttoptr i64 [[TMP24]] to i8* -// CHECK3-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 8 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK3-NEXT: [[TMP34:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 8 -// CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK3-NEXT: [[TMP36:%.*]] = inttoptr i64 [[TMP26]] to i8* -// CHECK3-NEXT: store i8* [[TMP36]], i8** [[TMP35]], align 8 -// CHECK3-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP38]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i64)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP39]], i64 5) -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] -// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] -// CHECK3-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] -// CHECK3-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP46]], [[TMP47]] -// CHECK3-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] -// CHECK3: cond.true14: -// CHECK3-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: br label [[COND_END16:%.*]] -// CHECK3: cond.false15: -// CHECK3-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END16]] -// CHECK3: cond.end16: -// CHECK3-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP48]], [[COND_TRUE14]] ], [ [[TMP49]], [[COND_FALSE15]] ] -// CHECK3-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP50]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP51:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP52:%.*]] = load i32, i32* [[TMP51]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP52]]) -// CHECK3-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP54:%.*]] = icmp ne i32 [[TMP53]], 0 -// CHECK3-NEXT: br i1 [[TMP54]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP55:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK3-NEXT: store i32 [[TMP55]], i32* [[CONV1]], align 8 -// CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK3: .omp.lastprivate.done: -// CHECK3-NEXT: br label [[OMP_PRECOND_END]] -// CHECK3: omp.precond.end: -// CHECK3-NEXT: [[TMP56:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK3-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP56]]) -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK3-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I6:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK3-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK3-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK3: omp.precond.then: -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK3-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) -// CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] -// CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CONV7:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK3-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CONV9:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ [[TMP11]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK3-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK3-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] -// CHECK3: omp.dispatch.body: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK3-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK3-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 -// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK3: omp.body.continue: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK3-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] -// CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK3-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK3-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] -// CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK3-NEXT: store i32 [[TMP30]], i32* [[CONV1]], align 8 -// CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK3: .omp.lastprivate.done: -// CHECK3-NEXT: br label [[OMP_PRECOND_END]] -// CHECK3: omp.precond.end: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 -// CHECK3-SAME: (i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK3-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK3-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK3: .execute: -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] -// CHECK3-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK3: .omp.deinit: -// CHECK3-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK3-NEXT: br label [[DOTEXIT:%.*]] -// CHECK3: .exit: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK3-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK3: omp.precond.then: -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] -// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK3-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to i8* -// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK3-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK3-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to i8* -// CHECK3-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 -// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK3-NEXT: [[TMP27:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* -// CHECK3-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP30]], i64 4) -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]] -// CHECK3-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] -// CHECK3: cond.true11: -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: br label [[COND_END13:%.*]] -// CHECK3: cond.false12: -// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END13]] -// CHECK3: cond.end13: -// CHECK3-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE11]] ], [ [[TMP40]], [[COND_FALSE12]] ] -// CHECK3-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP41]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP43]]) -// CHECK3-NEXT: br label [[OMP_PRECOND_END]] -// CHECK3: omp.precond.end: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK3-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK3: omp.precond.then: -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK3-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[CONV6:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 -// CHECK3-NEXT: [[CONV8:%.*]] = sext i16 [[TMP14]] to i32 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], 1 -// CHECK3-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i16 -// CHECK3-NEXT: store i16 [[CONV10]], i16* [[ARRAYIDX]], align 2 -// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK3: omp.body.continue: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK3-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK3-NEXT: br label [[OMP_PRECOND_END]] -// CHECK3: omp.precond.end: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 -// CHECK3-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK3: .execute: -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] -// CHECK3-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK3: .omp.deinit: -// CHECK3-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK3-NEXT: br label [[DOTEXIT:%.*]] -// CHECK3: .exit: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 -// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 -// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to i8* -// CHECK3-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK3-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to i8* -// CHECK3-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK3-NEXT: [[TMP16:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP17]], i64 3) -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 -// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK3: cond.true5: -// CHECK3-NEXT: br label [[COND_END7:%.*]] -// CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END7]] -// CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] -// CHECK3-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK3-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK3-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK3: omp.body.continue: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59 -// CHECK3-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK3-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* -// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK3: .execute: -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i64* [[F_CASTED]] to i32* -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i64, i64* [[F_CASTED]], align 8 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP3]]) #[[ATTR2]] -// CHECK3-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK3: .omp.deinit: -// CHECK3-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK3-NEXT: br label [[DOTEXIT:%.*]] -// CHECK3: .exit: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__6 -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK3-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 -// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK3-NEXT: [[CONV3:%.*]] = bitcast i64* [[F_CASTED]] to i32* -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[CONV3]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[F_CASTED]], align 8 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to i8* -// CHECK3-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK3-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to i8* -// CHECK3-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK3-NEXT: [[TMP18:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK3-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK3-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP12]] to i8* -// CHECK3-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x [10 x i32]]*, i64)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP21]], i64 4) -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP28]], 99 -// CHECK3-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] -// CHECK3: cond.true7: -// CHECK3-NEXT: br label [[COND_END9:%.*]] -// CHECK3: cond.false8: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END9]] -// CHECK3: cond.end9: -// CHECK3-NEXT: [[COND10:%.*]] = phi i32 [ 99, [[COND_TRUE7]] ], [ [[TMP29]], [[COND_FALSE8]] ] -// CHECK3-NEXT: store i32 [[COND10]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP30]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__7 -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK3-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK3-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK3-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[CONV4:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV4]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV5:%.*]] = sdiv i32 [[TMP10]], 10 -// CHECK3-NEXT: [[MUL6:%.*]] = mul nsw i32 [[DIV5]], 10 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL6]] -// CHECK3-NEXT: [[MUL7:%.*]] = mul nsw i32 [[SUB]], 1 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL7]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[J]], align 4 -// CHECK3-NEXT: store i32 10, i32* [[K]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK3-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP11]], [[MUL9]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 -// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD10]], [[TMP14]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 -// CHECK3-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK3-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM12]] -// CHECK3-NEXT: store i32 [[ADD11]], i32* [[ARRAYIDX13]], align 4 -// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK3: omp.body.continue: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l67 -// CHECK3-SAME: (i64 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK3-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK3: .execute: -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR2]] -// CHECK3-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK3: .omp.deinit: -// CHECK3-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK3-NEXT: br label [[DOTEXIT:%.*]] -// CHECK3: .exit: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__8 -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I8:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[J9:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK3-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], [[DIV5]] -// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[MUL]], 1 -// CHECK3-NEXT: store i32 [[SUB6]], i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] -// CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK3: land.lhs.true: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp slt i32 0, [[TMP6]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] -// CHECK3: omp.precond.then: -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP10]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK3-NEXT: [[CMP11:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] -// CHECK3-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK3-NEXT: [[CONV12:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK3-NEXT: store i32 [[TMP21]], i32* [[CONV12]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP18]] to i8* -// CHECK3-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK3-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP20]] to i8* -// CHECK3-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK3-NEXT: [[TMP28:%.*]] = inttoptr i64 [[TMP22]] to i8* -// CHECK3-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK3-NEXT: [[TMP30:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK3-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [10 x [10 x i32]]*)* @__omp_outlined__9 to i8*), i8* null, i8** [[TMP33]], i64 4) -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK3-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] -// CHECK3-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] -// CHECK3-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] -// CHECK3-NEXT: br i1 [[CMP16]], label [[COND_TRUE17:%.*]], label [[COND_FALSE18:%.*]] -// CHECK3: cond.true17: -// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: br label [[COND_END19:%.*]] -// CHECK3: cond.false18: -// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END19]] -// CHECK3: cond.end19: -// CHECK3-NEXT: [[COND20:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE17]] ], [ [[TMP43]], [[COND_FALSE18]] ] -// CHECK3-NEXT: store i32 [[COND20]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP44]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP46]]) -// CHECK3-NEXT: br label [[OMP_PRECOND_END]] -// CHECK3: omp.precond.end: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I10:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[J11:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK3-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], [[DIV5]] -// CHECK3-NEXT: [[SUB6:%.*]] = sub nsw i32 [[MUL]], 1 -// CHECK3-NEXT: store i32 [[SUB6]], i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] -// CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK3: land.lhs.true: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp slt i32 0, [[TMP6]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] -// CHECK3: omp.precond.then: -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK3-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP9]] to i32 -// CHECK3-NEXT: store i32 [[CONV8]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[CONV9]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[CONV12:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK3-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CMP13:%.*]] = icmp ule i64 [[CONV12]], [[TMP14]] -// CHECK3-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP16]], 0 -// CHECK3-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 -// CHECK3-NEXT: [[MUL16:%.*]] = mul nsw i32 1, [[DIV15]] -// CHECK3-NEXT: [[DIV17:%.*]] = sdiv i32 [[TMP15]], [[MUL16]] -// CHECK3-NEXT: [[MUL18:%.*]] = mul nsw i32 [[DIV17]], 1 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL18]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I10]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP19]], 0 -// CHECK3-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 -// CHECK3-NEXT: [[MUL21:%.*]] = mul nsw i32 1, [[DIV20]] -// CHECK3-NEXT: [[DIV22:%.*]] = sdiv i32 [[TMP18]], [[MUL21]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[SUB23:%.*]] = sub nsw i32 [[TMP20]], 0 -// CHECK3-NEXT: [[DIV24:%.*]] = sdiv i32 [[SUB23]], 1 -// CHECK3-NEXT: [[MUL25:%.*]] = mul nsw i32 1, [[DIV24]] -// CHECK3-NEXT: [[MUL26:%.*]] = mul nsw i32 [[DIV22]], [[MUL25]] -// CHECK3-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP17]], [[MUL26]] -// CHECK3-NEXT: [[MUL28:%.*]] = mul nsw i32 [[SUB27]], 1 -// CHECK3-NEXT: [[ADD29:%.*]] = add nsw i32 0, [[MUL28]] -// CHECK3-NEXT: store i32 [[ADD29]], i32* [[J11]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I10]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[J11]], align 4 -// CHECK3-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[I10]], align 4 -// CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[J11]], align 4 -// CHECK3-NEXT: [[IDXPROM31:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK3-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM31]] -// CHECK3-NEXT: store i32 [[ADD30]], i32* [[ARRAYIDX32]], align 4 -// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK3: omp.body.continue: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD33:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK3-NEXT: store i32 [[ADD33]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) -// CHECK3-NEXT: br label [[OMP_PRECOND_END]] -// CHECK3: omp.precond.end: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74 -// CHECK3-SAME: (i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK3-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK3-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK3-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK3: .execute: -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[V_ADDR]], align 8 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]], [1000 x i32]* [[TMP0]], i32* [[TMP4]]) #[[ATTR2]] -// CHECK3-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK3: .omp.deinit: -// CHECK3-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK3-NEXT: br label [[DOTEXIT:%.*]] -// CHECK3: .exit: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK3-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 8 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK3-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK3: omp.precond.then: -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] -// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK3-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK3-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32*, i32** [[V_ADDR]], align 8 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK3-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP15]] to i8* -// CHECK3-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK3-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK3-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK3-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP19]] to i8* -// CHECK3-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK3-NEXT: [[TMP28:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK3-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP20]] to i8* -// CHECK3-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @__omp_outlined__11 to i8*), i8* null, i8** [[TMP33]], i64 5) -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] -// CHECK3-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] -// CHECK3: cond.true11: -// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: br label [[COND_END13:%.*]] -// CHECK3: cond.false12: -// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END13]] -// CHECK3: cond.end13: -// CHECK3-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE11]] ], [ [[TMP43]], [[COND_FALSE12]] ] -// CHECK3-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP44]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP46]]) -// CHECK3-NEXT: br label [[OMP_PRECOND_END]] -// CHECK3: omp.precond.end: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__11 -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK3-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 8 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK3-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK3: omp.precond.then: -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK3-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[CONV6:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK3-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[V_ADDR]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i64 [[IDXPROM]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK3-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM8]] -// CHECK3-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX9]], align 4 -// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK3: omp.body.continue: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK3-NEXT: br label [[OMP_PRECOND_END]] -// CHECK3: omp.precond.end: -// CHECK3-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 -// CHECK4-SAME: (i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK4-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK4-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 -// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK4-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK4-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK4: .execute: -// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK4-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK4-NEXT: [[CONV3:%.*]] = bitcast i64* [[L_CASTED]] to i32* -// CHECK4-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i64, i64* [[L_CASTED]], align 8 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]], [1000 x i32]* [[TMP0]], i64 [[TMP5]]) #[[ATTR2:[0-9]+]] -// CHECK4-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK4: .omp.deinit: -// CHECK4-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK4-NEXT: br label [[DOTEXIT:%.*]] -// CHECK4: .exit: -// CHECK4-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK4-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 8 -// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 -// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK4-NEXT: [[TMP1:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 1) -// CHECK4-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct._globalized_locals_ty* -// CHECK4-NEXT: [[L2:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP2]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK4-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK4-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK4-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] -// CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK4: omp.precond.then: -// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK4-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK4-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK4: cond.true: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK4-NEXT: br label [[COND_END:%.*]] -// CHECK4: cond.false: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: br label [[COND_END]] -// CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] -// CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK4-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] -// CHECK4-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK4-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK4-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK4-NEXT: [[CONV9:%.*]] = bitcast i64* [[L_CASTED]] to i32* -// CHECK4-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load i64, i64* [[L_CASTED]], align 8 -// CHECK4-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK4-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 -// CHECK4-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK4-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP19]] to i8* -// CHECK4-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 -// CHECK4-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK4-NEXT: [[TMP29:%.*]] = inttoptr i64 [[TMP21]] to i8* -// CHECK4-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 8 -// CHECK4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK4-NEXT: [[TMP31:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK4-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 8 -// CHECK4-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK4-NEXT: [[TMP33:%.*]] = inttoptr i64 [[TMP23]] to i8* -// CHECK4-NEXT: store i8* [[TMP33]], i8** [[TMP32]], align 8 -// CHECK4-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 -// CHECK4-NEXT: [[TMP36:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP35]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i64)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP36]], i64 5) -// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] -// CHECK4-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] -// CHECK4-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] -// CHECK4-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK4-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]] -// CHECK4-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] -// CHECK4: cond.true14: -// CHECK4-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK4-NEXT: br label [[COND_END16:%.*]] -// CHECK4: cond.false15: -// CHECK4-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: br label [[COND_END16]] -// CHECK4: cond.end16: -// CHECK4-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE14]] ], [ [[TMP46]], [[COND_FALSE15]] ] -// CHECK4-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP47]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK4: omp.inner.for.end: -// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[TMP48:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP49:%.*]] = load i32, i32* [[TMP48]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP49]]) -// CHECK4-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0 -// CHECK4-NEXT: br i1 [[TMP51]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK4: .omp.lastprivate.then: -// CHECK4-NEXT: [[TMP52:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK4-NEXT: store i32 [[TMP52]], i32* [[CONV1]], align 8 -// CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK4: .omp.lastprivate.done: -// CHECK4-NEXT: br label [[OMP_PRECOND_END]] -// CHECK4: omp.precond.end: -// CHECK4-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP1]]) -// CHECK4-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK4-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[I6:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK4-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 -// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK4-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK4-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK4: omp.precond.then: -// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK4-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK4-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK4-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) -// CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]] -// CHECK4: omp.dispatch.cond: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CONV7:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK4-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP10]] -// CHECK4-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK4: cond.true: -// CHECK4-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: br label [[COND_END:%.*]] -// CHECK4: cond.false: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CONV9:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK4-NEXT: br label [[COND_END]] -// CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i64 [ [[TMP11]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK4-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK4-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK4-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] -// CHECK4: omp.dispatch.body: -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK4-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK4-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK4-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 -// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK4: omp.body.continue: -// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK4-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK4: omp.inner.for.end: -// CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] -// CHECK4: omp.dispatch.inc: -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK4-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK4-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: br label [[OMP_DISPATCH_COND]] -// CHECK4: omp.dispatch.end: -// CHECK4-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) -// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK4-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK4: .omp.lastprivate.then: -// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK4-NEXT: store i32 [[TMP30]], i32* [[CONV1]], align 8 -// CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK4: .omp.lastprivate.done: -// CHECK4-NEXT: br label [[OMP_PRECOND_END]] -// CHECK4: omp.precond.end: -// CHECK4-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 -// CHECK4-SAME: (i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 -// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK4-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK4-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK4-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK4-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK4: .execute: -// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] -// CHECK4-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK4: .omp.deinit: -// CHECK4-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK4-NEXT: br label [[DOTEXIT:%.*]] -// CHECK4: .exit: -// CHECK4-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 -// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 -// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK4-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK4-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK4: omp.precond.then: -// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] -// CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK4: cond.true: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: br label [[COND_END:%.*]] -// CHECK4: cond.false: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: br label [[COND_END]] -// CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] -// CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK4-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] -// CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK4-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK4-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to i8* -// CHECK4-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 -// CHECK4-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK4-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK4-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 -// CHECK4-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK4-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to i8* -// CHECK4-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 -// CHECK4-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK4-NEXT: [[TMP27:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* -// CHECK4-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 -// CHECK4-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 -// CHECK4-NEXT: [[TMP30:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP30]], i64 4) -// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]] -// CHECK4-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] -// CHECK4: cond.true11: -// CHECK4-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: br label [[COND_END13:%.*]] -// CHECK4: cond.false12: -// CHECK4-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: br label [[COND_END13]] -// CHECK4: cond.end13: -// CHECK4-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE11]] ], [ [[TMP40]], [[COND_FALSE12]] ] -// CHECK4-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP41]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK4: omp.inner.for.end: -// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP43]]) -// CHECK4-NEXT: br label [[OMP_PRECOND_END]] -// CHECK4: omp.precond.end: -// CHECK4-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 -// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK4-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK4-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK4-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK4: omp.precond.then: -// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK4-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK4-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK4-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[CONV6:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK4-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP11]] -// CHECK4-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK4-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 -// CHECK4-NEXT: [[CONV8:%.*]] = sext i16 [[TMP14]] to i32 -// CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], 1 -// CHECK4-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i16 -// CHECK4-NEXT: store i16 [[CONV10]], i16* [[ARRAYIDX]], align 2 -// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK4: omp.body.continue: -// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK4-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK4: omp.inner.for.end: -// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK4-NEXT: br label [[OMP_PRECOND_END]] -// CHECK4: omp.precond.end: -// CHECK4-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 -// CHECK4-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK4-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK4-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK4: .execute: -// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] -// CHECK4-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK4: .omp.deinit: -// CHECK4-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK4-NEXT: br label [[DOTEXIT:%.*]] -// CHECK4: .exit: -// CHECK4-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 -// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 -// CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK4: cond.true: -// CHECK4-NEXT: br label [[COND_END:%.*]] -// CHECK4: cond.false: -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: br label [[COND_END]] -// CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] -// CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 -// CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to i8* -// CHECK4-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8 -// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK4-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to i8* -// CHECK4-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 -// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK4-NEXT: [[TMP16:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK4-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK4-NEXT: [[TMP17:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP17]], i64 3) -// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 -// CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK4: cond.true5: -// CHECK4-NEXT: br label [[COND_END7:%.*]] -// CHECK4: cond.false6: -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: br label [[COND_END7]] -// CHECK4: cond.end7: -// CHECK4-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] -// CHECK4-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK4: omp.inner.for.end: -// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK4-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK4-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK4-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK4-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK4-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK4-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]] -// CHECK4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK4: omp.body.continue: -// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK4: omp.inner.for.end: -// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK4-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59 -// CHECK4-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK4-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK4-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* -// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK4-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK4-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK4: .execute: -// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[F_CASTED]] to i32* -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i64, i64* [[F_CASTED]], align 8 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP3]]) #[[ATTR2]] -// CHECK4-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK4: .omp.deinit: -// CHECK4-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK4-NEXT: br label [[DOTEXIT:%.*]] -// CHECK4: .exit: -// CHECK4-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__6 -// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK4-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 -// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* -// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 -// CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK4: cond.true: -// CHECK4-NEXT: br label [[COND_END:%.*]] -// CHECK4: cond.false: -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: br label [[COND_END]] -// CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] -// CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 -// CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK4-NEXT: [[CONV3:%.*]] = bitcast i64* [[F_CASTED]] to i32* -// CHECK4-NEXT: store i32 [[TMP11]], i32* [[CONV3]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i64, i64* [[F_CASTED]], align 8 -// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to i8* -// CHECK4-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 -// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK4-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to i8* -// CHECK4-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK4-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK4-NEXT: [[TMP18:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK4-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8 -// CHECK4-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK4-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP12]] to i8* -// CHECK4-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK4-NEXT: [[TMP21:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x [10 x i32]]*, i64)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP21]], i64 4) -// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK4-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP28]], 99 -// CHECK4-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] -// CHECK4: cond.true7: -// CHECK4-NEXT: br label [[COND_END9:%.*]] -// CHECK4: cond.false8: -// CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: br label [[COND_END9]] -// CHECK4: cond.end9: -// CHECK4-NEXT: [[COND10:%.*]] = phi i32 [ 99, [[COND_TRUE7]] ], [ [[TMP29]], [[COND_FALSE8]] ] -// CHECK4-NEXT: store i32 [[COND10]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP30]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK4: omp.inner.for.end: -// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK4-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__7 -// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK4-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK4-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK4-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 -// CHECK4-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* -// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK4-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK4-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK4-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[CONV4:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK4-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV4]], [[TMP7]] -// CHECK4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[DIV5:%.*]] = sdiv i32 [[TMP10]], 10 -// CHECK4-NEXT: [[MUL6:%.*]] = mul nsw i32 [[DIV5]], 10 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL6]] -// CHECK4-NEXT: [[MUL7:%.*]] = mul nsw i32 [[SUB]], 1 -// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL7]] -// CHECK4-NEXT: store i32 [[ADD8]], i32* [[J]], align 4 -// CHECK4-NEXT: store i32 10, i32* [[K]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK4-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] -// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP11]], [[MUL9]] -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 -// CHECK4-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD10]], [[TMP14]] -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 -// CHECK4-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK4-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM12]] -// CHECK4-NEXT: store i32 [[ADD11]], i32* [[ARRAYIDX13]], align 4 -// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK4: omp.body.continue: -// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK4-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK4: omp.inner.for.end: -// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK4: omp.loop.exit: -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK4-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l67 -// CHECK4-SAME: (i64 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK4-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK4-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK4-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK4-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK4-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK4: .execute: -// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR2]] -// CHECK4-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK4: .omp.deinit: -// CHECK4-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK4-NEXT: br label [[DOTEXIT:%.*]] -// CHECK4: .exit: -// CHECK4-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__8 -// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[I8:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[J9:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 -// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK4-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK4-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK4-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], [[DIV5]] -// CHECK4-NEXT: [[SUB6:%.*]] = sub nsw i32 [[MUL]], 1 -// CHECK4-NEXT: store i32 [[SUB6]], i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] -// CHECK4-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK4: land.lhs.true: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: [[CMP7:%.*]] = icmp slt i32 0, [[TMP6]] -// CHECK4-NEXT: br i1 [[CMP7]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] -// CHECK4: omp.precond.then: -// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK4-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK4-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] -// CHECK4-NEXT: br i1 [[CMP10]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK4: cond.true: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK4-NEXT: br label [[COND_END:%.*]] -// CHECK4: cond.false: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: br label [[COND_END]] -// CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] -// CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK4-NEXT: [[CMP11:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] -// CHECK4-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK4-NEXT: [[CONV12:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK4-NEXT: store i32 [[TMP21]], i32* [[CONV12]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP18]] to i8* -// CHECK4-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 -// CHECK4-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK4-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP20]] to i8* -// CHECK4-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 -// CHECK4-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK4-NEXT: [[TMP28:%.*]] = inttoptr i64 [[TMP22]] to i8* -// CHECK4-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 -// CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK4-NEXT: [[TMP30:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK4-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8 -// CHECK4-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 -// CHECK4-NEXT: [[TMP33:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [10 x [10 x i32]]*)* @__omp_outlined__9 to i8*), i8* null, i8** [[TMP33]], i64 4) -// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK4-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] -// CHECK4-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] -// CHECK4-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK4-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] -// CHECK4-NEXT: br i1 [[CMP16]], label [[COND_TRUE17:%.*]], label [[COND_FALSE18:%.*]] -// CHECK4: cond.true17: -// CHECK4-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK4-NEXT: br label [[COND_END19:%.*]] -// CHECK4: cond.false18: -// CHECK4-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: br label [[COND_END19]] -// CHECK4: cond.end19: -// CHECK4-NEXT: [[COND20:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE17]] ], [ [[TMP43]], [[COND_FALSE18]] ] -// CHECK4-NEXT: store i32 [[COND20]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP44]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK4: omp.inner.for.end: -// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP46]]) -// CHECK4-NEXT: br label [[OMP_PRECOND_END]] -// CHECK4: omp.precond.end: -// CHECK4-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[I10:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[J11:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK4-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK4-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK4-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK4-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], [[DIV5]] -// CHECK4-NEXT: [[SUB6:%.*]] = sub nsw i32 [[MUL]], 1 -// CHECK4-NEXT: store i32 [[SUB6]], i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] -// CHECK4-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK4: land.lhs.true: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: [[CMP7:%.*]] = icmp slt i32 0, [[TMP6]] -// CHECK4-NEXT: br i1 [[CMP7]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] -// CHECK4: omp.precond.then: -// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK4-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP8]] to i32 -// CHECK4-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP9]] to i32 -// CHECK4-NEXT: store i32 [[CONV8]], i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[CONV9]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[CONV12:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK4-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: [[CMP13:%.*]] = icmp ule i64 [[CONV12]], [[TMP14]] -// CHECK4-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP16]], 0 -// CHECK4-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 -// CHECK4-NEXT: [[MUL16:%.*]] = mul nsw i32 1, [[DIV15]] -// CHECK4-NEXT: [[DIV17:%.*]] = sdiv i32 [[TMP15]], [[MUL16]] -// CHECK4-NEXT: [[MUL18:%.*]] = mul nsw i32 [[DIV17]], 1 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL18]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I10]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP19]], 0 -// CHECK4-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 -// CHECK4-NEXT: [[MUL21:%.*]] = mul nsw i32 1, [[DIV20]] -// CHECK4-NEXT: [[DIV22:%.*]] = sdiv i32 [[TMP18]], [[MUL21]] -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK4-NEXT: [[SUB23:%.*]] = sub nsw i32 [[TMP20]], 0 -// CHECK4-NEXT: [[DIV24:%.*]] = sdiv i32 [[SUB23]], 1 -// CHECK4-NEXT: [[MUL25:%.*]] = mul nsw i32 1, [[DIV24]] -// CHECK4-NEXT: [[MUL26:%.*]] = mul nsw i32 [[DIV22]], [[MUL25]] -// CHECK4-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP17]], [[MUL26]] -// CHECK4-NEXT: [[MUL28:%.*]] = mul nsw i32 [[SUB27]], 1 -// CHECK4-NEXT: [[ADD29:%.*]] = add nsw i32 0, [[MUL28]] -// CHECK4-NEXT: store i32 [[ADD29]], i32* [[J11]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I10]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[J11]], align 4 -// CHECK4-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[I10]], align 4 -// CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[J11]], align 4 -// CHECK4-NEXT: [[IDXPROM31:%.*]] = sext i32 [[TMP24]] to i64 -// CHECK4-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM31]] -// CHECK4-NEXT: store i32 [[ADD30]], i32* [[ARRAYIDX32]], align 4 -// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK4: omp.body.continue: -// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD33:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK4-NEXT: store i32 [[ADD33]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK4: omp.inner.for.end: -// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) -// CHECK4-NEXT: br label [[OMP_PRECOND_END]] -// CHECK4: omp.precond.end: -// CHECK4-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74 -// CHECK4-SAME: (i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK4-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK4-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK4-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 -// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK4-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK4-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK4: .execute: -// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK4-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32*, i32** [[V_ADDR]], align 8 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]], [1000 x i32]* [[TMP0]], i32* [[TMP4]]) #[[ATTR2]] -// CHECK4-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK4: .omp.deinit: -// CHECK4-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK4-NEXT: br label [[DOTEXIT:%.*]] -// CHECK4: .exit: -// CHECK4-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK4-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 8 -// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK4-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 -// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK4-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK4: omp.precond.then: -// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] -// CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK4: cond.true: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: br label [[COND_END:%.*]] -// CHECK4: cond.false: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: br label [[COND_END]] -// CHECK4: cond.end: -// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] -// CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK4-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] -// CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK4-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK4-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32*, i32** [[V_ADDR]], align 8 -// CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK4-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP15]] to i8* -// CHECK4-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 -// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK4-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK4-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 -// CHECK4-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK4-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP19]] to i8* -// CHECK4-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 -// CHECK4-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK4-NEXT: [[TMP28:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK4-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 -// CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK4-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP20]] to i8* -// CHECK4-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8 -// CHECK4-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 -// CHECK4-NEXT: [[TMP33:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @__omp_outlined__11 to i8*), i8* null, i8** [[TMP33]], i64 5) -// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] -// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] -// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] -// CHECK4-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] -// CHECK4: cond.true11: -// CHECK4-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: br label [[COND_END13:%.*]] -// CHECK4: cond.false12: -// CHECK4-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: br label [[COND_END13]] -// CHECK4: cond.end13: -// CHECK4-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE11]] ], [ [[TMP43]], [[COND_FALSE12]] ] -// CHECK4-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP44]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK4: omp.inner.for.end: -// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP46]]) -// CHECK4-NEXT: br label [[OMP_PRECOND_END]] -// CHECK4: omp.precond.end: -// CHECK4-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__11 -// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK4-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 8 -// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK4-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK4-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK4-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 -// CHECK4-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK4-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK4: omp.precond.then: -// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK4-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK4-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK4-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 -// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK4-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[CONV6:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK4-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK4-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP11]] -// CHECK4-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 -// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32*, i32** [[V_ADDR]], align 8 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 -// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i64 [[IDXPROM]] -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK4-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK4-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM8]] -// CHECK4-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX9]], align 4 -// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK4: omp.body.continue: -// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK4-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK4: omp.inner.for.end: -// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK4: omp.loop.exit: -// CHECK4-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK4-NEXT: br label [[OMP_PRECOND_END]] -// CHECK4: omp.precond.end: -// CHECK4-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 // CHECK5-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK5-NEXT: entry: @@ -6552,8 +105,6 @@ int bar(int n){ // CHECK5-NEXT: br label [[DOTEXIT:%.*]] // CHECK5: .exit: // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: @@ -6707,8 +258,6 @@ int bar(int n){ // CHECK5-NEXT: [[TMP54:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 // CHECK5-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP54]]) // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__1 // CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: @@ -6834,8 +383,6 @@ int bar(int n){ // CHECK5-NEXT: br label [[OMP_PRECOND_END]] // CHECK5: omp.precond.end: // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 // CHECK5-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: @@ -6864,8 +411,6 @@ int bar(int n){ // CHECK5-NEXT: br label [[DOTEXIT:%.*]] // CHECK5: .exit: // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__2 // CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: @@ -6994,8 +539,6 @@ int bar(int n){ // CHECK5-NEXT: br label [[OMP_PRECOND_END]] // CHECK5: omp.precond.end: // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__3 // CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: @@ -7084,8 +627,6 @@ int bar(int n){ // CHECK5-NEXT: br label [[OMP_PRECOND_END]] // CHECK5: omp.precond.end: // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 // CHECK5-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: @@ -7108,8 +649,6 @@ int bar(int n){ // CHECK5-NEXT: br label [[DOTEXIT:%.*]] // CHECK5: .exit: // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__4 // CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: @@ -7201,8 +740,6 @@ int bar(int n){ // CHECK5: omp.loop.exit: // CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__5 // CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: @@ -7267,8 +804,6 @@ int bar(int n){ // CHECK5: omp.loop.exit: // CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59 // CHECK5-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: @@ -7297,8 +832,6 @@ int bar(int n){ // CHECK5-NEXT: br label [[DOTEXIT:%.*]] // CHECK5: .exit: // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__6 // CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: @@ -7402,8 +935,6 @@ int bar(int n){ // CHECK5: omp.loop.exit: // CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__7 // CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: @@ -7490,8 +1021,6 @@ int bar(int n){ // CHECK5: omp.loop.exit: // CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l67 // CHECK5-SAME: (i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: @@ -7520,8 +1049,6 @@ int bar(int n){ // CHECK5-NEXT: br label [[DOTEXIT:%.*]] // CHECK5: .exit: // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__8 // CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: @@ -7670,8 +1197,6 @@ int bar(int n){ // CHECK5-NEXT: br label [[OMP_PRECOND_END]] // CHECK5: omp.precond.end: // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__9 // CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: @@ -7807,8 +1332,6 @@ int bar(int n){ // CHECK5-NEXT: br label [[OMP_PRECOND_END]] // CHECK5: omp.precond.end: // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74 // CHECK5-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: @@ -7840,8 +1363,6 @@ int bar(int n){ // CHECK5-NEXT: br label [[DOTEXIT:%.*]] // CHECK5: .exit: // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__10 // CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: @@ -7976,8 +1497,6 @@ int bar(int n){ // CHECK5-NEXT: br label [[OMP_PRECOND_END]] // CHECK5: omp.precond.end: // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__11 // CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: @@ -8068,8 +1587,6 @@ int bar(int n){ // CHECK5-NEXT: br label [[OMP_PRECOND_END]] // CHECK5: omp.precond.end: // CHECK5-NEXT: ret void -// -// // CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 // CHECK6-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK6-NEXT: entry: @@ -8104,8 +1621,6 @@ int bar(int n){ // CHECK6-NEXT: br label [[DOTEXIT:%.*]] // CHECK6: .exit: // CHECK6-NEXT: ret void -// -// // CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { // CHECK6-NEXT: entry: @@ -8259,8 +1774,6 @@ int bar(int n){ // CHECK6-NEXT: [[TMP54:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 // CHECK6-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP54]]) // CHECK6-NEXT: ret void -// -// // CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__1 // CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { // CHECK6-NEXT: entry: @@ -8386,8 +1899,6 @@ int bar(int n){ // CHECK6-NEXT: br label [[OMP_PRECOND_END]] // CHECK6: omp.precond.end: // CHECK6-NEXT: ret void -// -// // CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 // CHECK6-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK6-NEXT: entry: @@ -8416,8 +1927,6 @@ int bar(int n){ // CHECK6-NEXT: br label [[DOTEXIT:%.*]] // CHECK6: .exit: // CHECK6-NEXT: ret void -// -// // CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__2 // CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK6-NEXT: entry: @@ -8546,8 +2055,6 @@ int bar(int n){ // CHECK6-NEXT: br label [[OMP_PRECOND_END]] // CHECK6: omp.precond.end: // CHECK6-NEXT: ret void -// -// // CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__3 // CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK6-NEXT: entry: @@ -8636,8 +2143,6 @@ int bar(int n){ // CHECK6-NEXT: br label [[OMP_PRECOND_END]] // CHECK6: omp.precond.end: // CHECK6-NEXT: ret void -// -// // CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 // CHECK6-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK6-NEXT: entry: @@ -8660,8 +2165,6 @@ int bar(int n){ // CHECK6-NEXT: br label [[DOTEXIT:%.*]] // CHECK6: .exit: // CHECK6-NEXT: ret void -// -// // CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__4 // CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK6-NEXT: entry: @@ -8753,8 +2256,6 @@ int bar(int n){ // CHECK6: omp.loop.exit: // CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK6-NEXT: ret void -// -// // CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__5 // CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK6-NEXT: entry: @@ -8819,8 +2320,6 @@ int bar(int n){ // CHECK6: omp.loop.exit: // CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK6-NEXT: ret void -// -// // CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59 // CHECK6-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { // CHECK6-NEXT: entry: @@ -8849,8 +2348,6 @@ int bar(int n){ // CHECK6-NEXT: br label [[DOTEXIT:%.*]] // CHECK6: .exit: // CHECK6-NEXT: ret void -// -// // CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__6 // CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { // CHECK6-NEXT: entry: @@ -8954,8 +2451,6 @@ int bar(int n){ // CHECK6: omp.loop.exit: // CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK6-NEXT: ret void -// -// // CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__7 // CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { // CHECK6-NEXT: entry: @@ -9042,8 +2537,6 @@ int bar(int n){ // CHECK6: omp.loop.exit: // CHECK6-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK6-NEXT: ret void -// -// // CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l67 // CHECK6-SAME: (i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { // CHECK6-NEXT: entry: @@ -9072,8 +2565,6 @@ int bar(int n){ // CHECK6-NEXT: br label [[DOTEXIT:%.*]] // CHECK6: .exit: // CHECK6-NEXT: ret void -// -// // CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__8 // CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { // CHECK6-NEXT: entry: @@ -9222,8 +2713,6 @@ int bar(int n){ // CHECK6-NEXT: br label [[OMP_PRECOND_END]] // CHECK6: omp.precond.end: // CHECK6-NEXT: ret void -// -// // CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__9 // CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { // CHECK6-NEXT: entry: @@ -9359,8 +2848,6 @@ int bar(int n){ // CHECK6-NEXT: br label [[OMP_PRECOND_END]] // CHECK6: omp.precond.end: // CHECK6-NEXT: ret void -// -// // CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74 // CHECK6-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { // CHECK6-NEXT: entry: @@ -9392,8 +2879,6 @@ int bar(int n){ // CHECK6-NEXT: br label [[DOTEXIT:%.*]] // CHECK6: .exit: // CHECK6-NEXT: ret void -// -// // CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__10 // CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { // CHECK6-NEXT: entry: @@ -9528,8 +3013,6 @@ int bar(int n){ // CHECK6-NEXT: br label [[OMP_PRECOND_END]] // CHECK6: omp.precond.end: // CHECK6-NEXT: ret void -// -// // CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__11 // CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { // CHECK6-NEXT: entry: @@ -9620,8 +3103,6 @@ int bar(int n){ // CHECK6-NEXT: br label [[OMP_PRECOND_END]] // CHECK6: omp.precond.end: // CHECK6-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 // CHECK7-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK7-NEXT: entry: @@ -9656,8 +3137,6 @@ int bar(int n){ // CHECK7-NEXT: br label [[DOTEXIT:%.*]] // CHECK7: .exit: // CHECK7-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: @@ -9806,8 +3285,6 @@ int bar(int n){ // CHECK7: omp.precond.end: // CHECK7-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP1]]) // CHECK7-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@__omp_outlined__1 // CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: @@ -9933,8 +3410,6 @@ int bar(int n){ // CHECK7-NEXT: br label [[OMP_PRECOND_END]] // CHECK7: omp.precond.end: // CHECK7-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 // CHECK7-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: @@ -9963,8 +3438,6 @@ int bar(int n){ // CHECK7-NEXT: br label [[DOTEXIT:%.*]] // CHECK7: .exit: // CHECK7-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@__omp_outlined__2 // CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: @@ -10093,8 +3566,6 @@ int bar(int n){ // CHECK7-NEXT: br label [[OMP_PRECOND_END]] // CHECK7: omp.precond.end: // CHECK7-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@__omp_outlined__3 // CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: @@ -10183,8 +3654,6 @@ int bar(int n){ // CHECK7-NEXT: br label [[OMP_PRECOND_END]] // CHECK7: omp.precond.end: // CHECK7-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 // CHECK7-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: @@ -10207,8 +3676,6 @@ int bar(int n){ // CHECK7-NEXT: br label [[DOTEXIT:%.*]] // CHECK7: .exit: // CHECK7-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@__omp_outlined__4 // CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: @@ -10300,8 +3767,6 @@ int bar(int n){ // CHECK7: omp.loop.exit: // CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK7-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@__omp_outlined__5 // CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: @@ -10366,8 +3831,6 @@ int bar(int n){ // CHECK7: omp.loop.exit: // CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59 // CHECK7-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: @@ -10396,8 +3859,6 @@ int bar(int n){ // CHECK7-NEXT: br label [[DOTEXIT:%.*]] // CHECK7: .exit: // CHECK7-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@__omp_outlined__6 // CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: @@ -10501,8 +3962,6 @@ int bar(int n){ // CHECK7: omp.loop.exit: // CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK7-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@__omp_outlined__7 // CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: @@ -10589,8 +4048,6 @@ int bar(int n){ // CHECK7: omp.loop.exit: // CHECK7-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK7-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l67 // CHECK7-SAME: (i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: @@ -10619,8 +4076,6 @@ int bar(int n){ // CHECK7-NEXT: br label [[DOTEXIT:%.*]] // CHECK7: .exit: // CHECK7-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@__omp_outlined__8 // CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: @@ -10769,8 +4224,6 @@ int bar(int n){ // CHECK7-NEXT: br label [[OMP_PRECOND_END]] // CHECK7: omp.precond.end: // CHECK7-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@__omp_outlined__9 // CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: @@ -10906,8 +4359,6 @@ int bar(int n){ // CHECK7-NEXT: br label [[OMP_PRECOND_END]] // CHECK7: omp.precond.end: // CHECK7-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74 // CHECK7-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: @@ -10939,8 +4390,6 @@ int bar(int n){ // CHECK7-NEXT: br label [[DOTEXIT:%.*]] // CHECK7: .exit: // CHECK7-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@__omp_outlined__10 // CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: @@ -11075,8 +4524,6 @@ int bar(int n){ // CHECK7-NEXT: br label [[OMP_PRECOND_END]] // CHECK7: omp.precond.end: // CHECK7-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@__omp_outlined__11 // CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: @@ -11167,8 +4614,6 @@ int bar(int n){ // CHECK7-NEXT: br label [[OMP_PRECOND_END]] // CHECK7: omp.precond.end: // CHECK7-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 // CHECK8-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK8-NEXT: entry: @@ -11203,8 +4648,6 @@ int bar(int n){ // CHECK8-NEXT: br label [[DOTEXIT:%.*]] // CHECK8: .exit: // CHECK8-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: @@ -11353,8 +4796,6 @@ int bar(int n){ // CHECK8: omp.precond.end: // CHECK8-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP1]]) // CHECK8-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@__omp_outlined__1 // CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: @@ -11480,8 +4921,6 @@ int bar(int n){ // CHECK8-NEXT: br label [[OMP_PRECOND_END]] // CHECK8: omp.precond.end: // CHECK8-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 // CHECK8-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: @@ -11510,8 +4949,6 @@ int bar(int n){ // CHECK8-NEXT: br label [[DOTEXIT:%.*]] // CHECK8: .exit: // CHECK8-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@__omp_outlined__2 // CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: @@ -11640,8 +5077,6 @@ int bar(int n){ // CHECK8-NEXT: br label [[OMP_PRECOND_END]] // CHECK8: omp.precond.end: // CHECK8-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@__omp_outlined__3 // CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: @@ -11730,8 +5165,6 @@ int bar(int n){ // CHECK8-NEXT: br label [[OMP_PRECOND_END]] // CHECK8: omp.precond.end: // CHECK8-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 // CHECK8-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: @@ -11754,8 +5187,6 @@ int bar(int n){ // CHECK8-NEXT: br label [[DOTEXIT:%.*]] // CHECK8: .exit: // CHECK8-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@__omp_outlined__4 // CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: @@ -11847,8 +5278,6 @@ int bar(int n){ // CHECK8: omp.loop.exit: // CHECK8-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK8-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@__omp_outlined__5 // CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: @@ -11913,8 +5342,6 @@ int bar(int n){ // CHECK8: omp.loop.exit: // CHECK8-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK8-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59 // CHECK8-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: @@ -11943,8 +5370,6 @@ int bar(int n){ // CHECK8-NEXT: br label [[DOTEXIT:%.*]] // CHECK8: .exit: // CHECK8-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@__omp_outlined__6 // CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: @@ -12048,8 +5473,6 @@ int bar(int n){ // CHECK8: omp.loop.exit: // CHECK8-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK8-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@__omp_outlined__7 // CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: @@ -12136,8 +5559,6 @@ int bar(int n){ // CHECK8: omp.loop.exit: // CHECK8-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK8-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l67 // CHECK8-SAME: (i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: @@ -12166,8 +5587,6 @@ int bar(int n){ // CHECK8-NEXT: br label [[DOTEXIT:%.*]] // CHECK8: .exit: // CHECK8-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@__omp_outlined__8 // CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: @@ -12316,8 +5735,6 @@ int bar(int n){ // CHECK8-NEXT: br label [[OMP_PRECOND_END]] // CHECK8: omp.precond.end: // CHECK8-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@__omp_outlined__9 // CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: @@ -12453,8 +5870,6 @@ int bar(int n){ // CHECK8-NEXT: br label [[OMP_PRECOND_END]] // CHECK8: omp.precond.end: // CHECK8-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74 // CHECK8-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: @@ -12486,8 +5901,6 @@ int bar(int n){ // CHECK8-NEXT: br label [[DOTEXIT:%.*]] // CHECK8: .exit: // CHECK8-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@__omp_outlined__10 // CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: @@ -12622,8 +6035,6 @@ int bar(int n){ // CHECK8-NEXT: br label [[OMP_PRECOND_END]] // CHECK8: omp.precond.end: // CHECK8-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@__omp_outlined__11 // CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: @@ -12714,8 +6125,6 @@ int bar(int n){ // CHECK8-NEXT: br label [[OMP_PRECOND_END]] // CHECK8: omp.precond.end: // CHECK8-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 // CHECK9-SAME: (i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK9-NEXT: entry: @@ -12754,8 +6163,6 @@ int bar(int n){ // CHECK9-NEXT: br label [[DOTEXIT:%.*]] // CHECK9: .exit: // CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: @@ -12915,8 +6322,6 @@ int bar(int n){ // CHECK9-NEXT: [[TMP56:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 // CHECK9-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP56]]) // CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@__omp_outlined__1 // CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: @@ -13050,8 +6455,6 @@ int bar(int n){ // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 // CHECK9-SAME: (i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: @@ -13082,8 +6485,6 @@ int bar(int n){ // CHECK9-NEXT: br label [[DOTEXIT:%.*]] // CHECK9: .exit: // CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@__omp_outlined__2 // CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: @@ -13216,8 +6617,6 @@ int bar(int n){ // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@__omp_outlined__3 // CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: @@ -13311,8 +6710,6 @@ int bar(int n){ // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 // CHECK9-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: @@ -13335,8 +6732,6 @@ int bar(int n){ // CHECK9-NEXT: br label [[DOTEXIT:%.*]] // CHECK9: .exit: // CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@__omp_outlined__4 // CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: @@ -13430,8 +6825,6 @@ int bar(int n){ // CHECK9: omp.loop.exit: // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@__omp_outlined__5 // CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: @@ -13500,8 +6893,6 @@ int bar(int n){ // CHECK9: omp.loop.exit: // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59 // CHECK9-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: @@ -13532,8 +6923,6 @@ int bar(int n){ // CHECK9-NEXT: br label [[DOTEXIT:%.*]] // CHECK9: .exit: // CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@__omp_outlined__6 // CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: @@ -13641,8 +7030,6 @@ int bar(int n){ // CHECK9: omp.loop.exit: // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@__omp_outlined__7 // CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: @@ -13735,8 +7122,6 @@ int bar(int n){ // CHECK9: omp.loop.exit: // CHECK9-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l67 // CHECK9-SAME: (i64 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: @@ -13767,8 +7152,6 @@ int bar(int n){ // CHECK9-NEXT: br label [[DOTEXIT:%.*]] // CHECK9: .exit: // CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@__omp_outlined__8 // CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: @@ -13917,8 +7300,6 @@ int bar(int n){ // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@__omp_outlined__9 // CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: @@ -14054,8 +7435,6 @@ int bar(int n){ // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74 // CHECK9-SAME: (i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: @@ -14089,8 +7468,6 @@ int bar(int n){ // CHECK9-NEXT: br label [[DOTEXIT:%.*]] // CHECK9: .exit: // CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@__omp_outlined__10 // CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: @@ -14229,8 +7606,6 @@ int bar(int n){ // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@__omp_outlined__11 // CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: @@ -14327,8 +7702,6 @@ int bar(int n){ // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 // CHECK10-SAME: (i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK10-NEXT: entry: @@ -14367,8 +7740,6 @@ int bar(int n){ // CHECK10-NEXT: br label [[DOTEXIT:%.*]] // CHECK10: .exit: // CHECK10-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK10-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: @@ -14523,8 +7894,6 @@ int bar(int n){ // CHECK10: omp.precond.end: // CHECK10-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP1]]) // CHECK10-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@__omp_outlined__1 // CHECK10-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: @@ -14658,8 +8027,6 @@ int bar(int n){ // CHECK10-NEXT: br label [[OMP_PRECOND_END]] // CHECK10: omp.precond.end: // CHECK10-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 // CHECK10-SAME: (i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: @@ -14690,8 +8057,6 @@ int bar(int n){ // CHECK10-NEXT: br label [[DOTEXIT:%.*]] // CHECK10: .exit: // CHECK10-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@__omp_outlined__2 // CHECK10-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: @@ -14824,8 +8189,6 @@ int bar(int n){ // CHECK10-NEXT: br label [[OMP_PRECOND_END]] // CHECK10: omp.precond.end: // CHECK10-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@__omp_outlined__3 // CHECK10-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: @@ -14919,8 +8282,6 @@ int bar(int n){ // CHECK10-NEXT: br label [[OMP_PRECOND_END]] // CHECK10: omp.precond.end: // CHECK10-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 // CHECK10-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: @@ -14943,8 +8304,6 @@ int bar(int n){ // CHECK10-NEXT: br label [[DOTEXIT:%.*]] // CHECK10: .exit: // CHECK10-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@__omp_outlined__4 // CHECK10-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: @@ -15038,8 +8397,6 @@ int bar(int n){ // CHECK10: omp.loop.exit: // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK10-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@__omp_outlined__5 // CHECK10-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: @@ -15108,8 +8465,6 @@ int bar(int n){ // CHECK10: omp.loop.exit: // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK10-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59 // CHECK10-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: @@ -15140,8 +8495,6 @@ int bar(int n){ // CHECK10-NEXT: br label [[DOTEXIT:%.*]] // CHECK10: .exit: // CHECK10-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@__omp_outlined__6 // CHECK10-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: @@ -15249,8 +8602,6 @@ int bar(int n){ // CHECK10: omp.loop.exit: // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK10-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@__omp_outlined__7 // CHECK10-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: @@ -15343,8 +8694,6 @@ int bar(int n){ // CHECK10: omp.loop.exit: // CHECK10-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK10-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l67 // CHECK10-SAME: (i64 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: @@ -15375,8 +8724,6 @@ int bar(int n){ // CHECK10-NEXT: br label [[DOTEXIT:%.*]] // CHECK10: .exit: // CHECK10-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@__omp_outlined__8 // CHECK10-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: @@ -15525,8 +8872,6 @@ int bar(int n){ // CHECK10-NEXT: br label [[OMP_PRECOND_END]] // CHECK10: omp.precond.end: // CHECK10-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@__omp_outlined__9 // CHECK10-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: @@ -15662,8 +9007,6 @@ int bar(int n){ // CHECK10-NEXT: br label [[OMP_PRECOND_END]] // CHECK10: omp.precond.end: // CHECK10-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74 // CHECK10-SAME: (i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: @@ -15697,8 +9040,6 @@ int bar(int n){ // CHECK10-NEXT: br label [[DOTEXIT:%.*]] // CHECK10: .exit: // CHECK10-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@__omp_outlined__10 // CHECK10-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: @@ -15837,8 +9178,6 @@ int bar(int n){ // CHECK10-NEXT: br label [[OMP_PRECOND_END]] // CHECK10: omp.precond.end: // CHECK10-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@__omp_outlined__11 // CHECK10-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: @@ -15935,8 +9274,6 @@ int bar(int n){ // CHECK10-NEXT: br label [[OMP_PRECOND_END]] // CHECK10: omp.precond.end: // CHECK10-NEXT: ret void -// -// // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 // CHECK11-SAME: (i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK11-NEXT: entry: @@ -15975,8 +9312,6 @@ int bar(int n){ // CHECK11-NEXT: br label [[DOTEXIT:%.*]] // CHECK11: .exit: // CHECK11-NEXT: ret void -// -// // CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: @@ -16136,8 +9471,6 @@ int bar(int n){ // CHECK11-NEXT: [[TMP56:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 // CHECK11-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP56]]) // CHECK11-NEXT: ret void -// -// // CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__1 // CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: @@ -16271,8 +9604,6 @@ int bar(int n){ // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void -// -// // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 // CHECK11-SAME: (i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: @@ -16303,8 +9634,6 @@ int bar(int n){ // CHECK11-NEXT: br label [[DOTEXIT:%.*]] // CHECK11: .exit: // CHECK11-NEXT: ret void -// -// // CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__2 // CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: @@ -16437,8 +9766,6 @@ int bar(int n){ // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void -// -// // CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__3 // CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: @@ -16532,8 +9859,6 @@ int bar(int n){ // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void -// -// // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 // CHECK11-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: @@ -16556,8 +9881,6 @@ int bar(int n){ // CHECK11-NEXT: br label [[DOTEXIT:%.*]] // CHECK11: .exit: // CHECK11-NEXT: ret void -// -// // CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__4 // CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: @@ -16651,8 +9974,6 @@ int bar(int n){ // CHECK11: omp.loop.exit: // CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK11-NEXT: ret void -// -// // CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__5 // CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: @@ -16721,8 +10042,6 @@ int bar(int n){ // CHECK11: omp.loop.exit: // CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void -// -// // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59 // CHECK11-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: @@ -16753,8 +10072,6 @@ int bar(int n){ // CHECK11-NEXT: br label [[DOTEXIT:%.*]] // CHECK11: .exit: // CHECK11-NEXT: ret void -// -// // CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__6 // CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: @@ -16862,8 +10179,6 @@ int bar(int n){ // CHECK11: omp.loop.exit: // CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK11-NEXT: ret void -// -// // CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__7 // CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: @@ -16956,8 +10271,6 @@ int bar(int n){ // CHECK11: omp.loop.exit: // CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK11-NEXT: ret void -// -// // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l67 // CHECK11-SAME: (i64 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: @@ -16988,8 +10301,6 @@ int bar(int n){ // CHECK11-NEXT: br label [[DOTEXIT:%.*]] // CHECK11: .exit: // CHECK11-NEXT: ret void -// -// // CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__8 // CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: @@ -17137,8 +10448,6 @@ int bar(int n){ // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void -// -// // CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__9 // CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: @@ -17270,8 +10579,6 @@ int bar(int n){ // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void -// -// // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74 // CHECK11-SAME: (i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: @@ -17305,8 +10612,6 @@ int bar(int n){ // CHECK11-NEXT: br label [[DOTEXIT:%.*]] // CHECK11: .exit: // CHECK11-NEXT: ret void -// -// // CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__10 // CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: @@ -17445,8 +10750,6 @@ int bar(int n){ // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void -// -// // CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__11 // CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { // CHECK11-NEXT: entry: @@ -17543,8 +10846,6 @@ int bar(int n){ // CHECK11-NEXT: br label [[OMP_PRECOND_END]] // CHECK11: omp.precond.end: // CHECK11-NEXT: ret void -// -// // CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 // CHECK12-SAME: (i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK12-NEXT: entry: @@ -17583,8 +10884,6 @@ int bar(int n){ // CHECK12-NEXT: br label [[DOTEXIT:%.*]] // CHECK12: .exit: // CHECK12-NEXT: ret void -// -// // CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0]] { // CHECK12-NEXT: entry: @@ -17739,8 +11038,6 @@ int bar(int n){ // CHECK12: omp.precond.end: // CHECK12-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP1]]) // CHECK12-NEXT: ret void -// -// // CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__1 // CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0]] { // CHECK12-NEXT: entry: @@ -17874,8 +11171,6 @@ int bar(int n){ // CHECK12-NEXT: br label [[OMP_PRECOND_END]] // CHECK12: omp.precond.end: // CHECK12-NEXT: ret void -// -// // CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 // CHECK12-SAME: (i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK12-NEXT: entry: @@ -17906,8 +11201,6 @@ int bar(int n){ // CHECK12-NEXT: br label [[DOTEXIT:%.*]] // CHECK12: .exit: // CHECK12-NEXT: ret void -// -// // CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__2 // CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK12-NEXT: entry: @@ -18040,8 +11333,6 @@ int bar(int n){ // CHECK12-NEXT: br label [[OMP_PRECOND_END]] // CHECK12: omp.precond.end: // CHECK12-NEXT: ret void -// -// // CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__3 // CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK12-NEXT: entry: @@ -18135,8 +11426,6 @@ int bar(int n){ // CHECK12-NEXT: br label [[OMP_PRECOND_END]] // CHECK12: omp.precond.end: // CHECK12-NEXT: ret void -// -// // CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 // CHECK12-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK12-NEXT: entry: @@ -18159,8 +11448,6 @@ int bar(int n){ // CHECK12-NEXT: br label [[DOTEXIT:%.*]] // CHECK12: .exit: // CHECK12-NEXT: ret void -// -// // CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__4 // CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK12-NEXT: entry: @@ -18254,8 +11541,6 @@ int bar(int n){ // CHECK12: omp.loop.exit: // CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK12-NEXT: ret void -// -// // CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__5 // CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK12-NEXT: entry: @@ -18324,8 +11609,6 @@ int bar(int n){ // CHECK12: omp.loop.exit: // CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK12-NEXT: ret void -// -// // CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59 // CHECK12-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { // CHECK12-NEXT: entry: @@ -18356,8 +11639,6 @@ int bar(int n){ // CHECK12-NEXT: br label [[DOTEXIT:%.*]] // CHECK12: .exit: // CHECK12-NEXT: ret void -// -// // CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__6 // CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { // CHECK12-NEXT: entry: @@ -18465,8 +11746,6 @@ int bar(int n){ // CHECK12: omp.loop.exit: // CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK12-NEXT: ret void -// -// // CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__7 // CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { // CHECK12-NEXT: entry: @@ -18559,8 +11838,6 @@ int bar(int n){ // CHECK12: omp.loop.exit: // CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK12-NEXT: ret void -// -// // CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l67 // CHECK12-SAME: (i64 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { // CHECK12-NEXT: entry: @@ -18591,8 +11868,6 @@ int bar(int n){ // CHECK12-NEXT: br label [[DOTEXIT:%.*]] // CHECK12: .exit: // CHECK12-NEXT: ret void -// -// // CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__8 // CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { // CHECK12-NEXT: entry: @@ -18740,8 +12015,6 @@ int bar(int n){ // CHECK12-NEXT: br label [[OMP_PRECOND_END]] // CHECK12: omp.precond.end: // CHECK12-NEXT: ret void -// -// // CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__9 // CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { // CHECK12-NEXT: entry: @@ -18873,8 +12146,6 @@ int bar(int n){ // CHECK12-NEXT: br label [[OMP_PRECOND_END]] // CHECK12: omp.precond.end: // CHECK12-NEXT: ret void -// -// // CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74 // CHECK12-SAME: (i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { // CHECK12-NEXT: entry: @@ -18908,8 +12179,6 @@ int bar(int n){ // CHECK12-NEXT: br label [[DOTEXIT:%.*]] // CHECK12: .exit: // CHECK12-NEXT: ret void -// -// // CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__10 // CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { // CHECK12-NEXT: entry: @@ -19048,8 +12317,6 @@ int bar(int n){ // CHECK12-NEXT: br label [[OMP_PRECOND_END]] // CHECK12: omp.precond.end: // CHECK12-NEXT: ret void -// -// // CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__11 // CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { // CHECK12-NEXT: entry: @@ -19146,8 +12413,6 @@ int bar(int n){ // CHECK12-NEXT: br label [[OMP_PRECOND_END]] // CHECK12: omp.precond.end: // CHECK12-NEXT: ret void -// -// // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 // CHECK13-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK13-NEXT: entry: @@ -19182,8 +12447,6 @@ int bar(int n){ // CHECK13-NEXT: br label [[DOTEXIT:%.*]] // CHECK13: .exit: // CHECK13-NEXT: ret void -// -// // CHECK13-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: @@ -19337,8 +12600,6 @@ int bar(int n){ // CHECK13-NEXT: [[TMP54:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 // CHECK13-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP54]]) // CHECK13-NEXT: ret void -// -// // CHECK13-LABEL: define {{[^@]+}}@__omp_outlined__1 // CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: @@ -19464,8 +12725,6 @@ int bar(int n){ // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void -// -// // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 // CHECK13-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: @@ -19494,8 +12753,6 @@ int bar(int n){ // CHECK13-NEXT: br label [[DOTEXIT:%.*]] // CHECK13: .exit: // CHECK13-NEXT: ret void -// -// // CHECK13-LABEL: define {{[^@]+}}@__omp_outlined__2 // CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: @@ -19624,8 +12881,6 @@ int bar(int n){ // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void -// -// // CHECK13-LABEL: define {{[^@]+}}@__omp_outlined__3 // CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: @@ -19714,8 +12969,6 @@ int bar(int n){ // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void -// -// // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 // CHECK13-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: @@ -19738,8 +12991,6 @@ int bar(int n){ // CHECK13-NEXT: br label [[DOTEXIT:%.*]] // CHECK13: .exit: // CHECK13-NEXT: ret void -// -// // CHECK13-LABEL: define {{[^@]+}}@__omp_outlined__4 // CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: @@ -19831,8 +13082,6 @@ int bar(int n){ // CHECK13: omp.loop.exit: // CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK13-NEXT: ret void -// -// // CHECK13-LABEL: define {{[^@]+}}@__omp_outlined__5 // CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: @@ -19897,8 +13146,6 @@ int bar(int n){ // CHECK13: omp.loop.exit: // CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK13-NEXT: ret void -// -// // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59 // CHECK13-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: @@ -19927,8 +13174,6 @@ int bar(int n){ // CHECK13-NEXT: br label [[DOTEXIT:%.*]] // CHECK13: .exit: // CHECK13-NEXT: ret void -// -// // CHECK13-LABEL: define {{[^@]+}}@__omp_outlined__6 // CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: @@ -20032,8 +13277,6 @@ int bar(int n){ // CHECK13: omp.loop.exit: // CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK13-NEXT: ret void -// -// // CHECK13-LABEL: define {{[^@]+}}@__omp_outlined__7 // CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: @@ -20120,8 +13363,6 @@ int bar(int n){ // CHECK13: omp.loop.exit: // CHECK13-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) // CHECK13-NEXT: ret void -// -// // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l67 // CHECK13-SAME: (i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: @@ -20150,8 +13391,6 @@ int bar(int n){ // CHECK13-NEXT: br label [[DOTEXIT:%.*]] // CHECK13: .exit: // CHECK13-NEXT: ret void -// -// // CHECK13-LABEL: define {{[^@]+}}@__omp_outlined__8 // CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: @@ -20300,8 +13539,6 @@ int bar(int n){ // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void -// -// // CHECK13-LABEL: define {{[^@]+}}@__omp_outlined__9 // CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: @@ -20437,8 +13674,6 @@ int bar(int n){ // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void -// -// // CHECK13-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74 // CHECK13-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: @@ -20470,8 +13705,6 @@ int bar(int n){ // CHECK13-NEXT: br label [[DOTEXIT:%.*]] // CHECK13: .exit: // CHECK13-NEXT: ret void -// -// // CHECK13-LABEL: define {{[^@]+}}@__omp_outlined__10 // CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: @@ -20606,8 +13839,6 @@ int bar(int n){ // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void -// -// // CHECK13-LABEL: define {{[^@]+}}@__omp_outlined__11 // CHECK13-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { // CHECK13-NEXT: entry: @@ -20698,338 +13929,1281 @@ int bar(int n){ // CHECK13-NEXT: br label [[OMP_PRECOND_END]] // CHECK13: omp.precond.end: // CHECK13-NEXT: ret void -// -// // CHECK14-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 // CHECK14-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK14-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK14-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK14-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK14-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK14-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 +// CHECK14-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK14-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK14-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK14-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK14: .execute: +// CHECK14-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK14-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK14-NEXT: store i32 [[TMP4]], i32* [[L_CASTED]], align 4 +// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK14-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i32]* [[TMP0]], i32 [[TMP5]]) #[[ATTR2:[0-9]+]] +// CHECK14-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK14: .omp.deinit: +// CHECK14-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK14-NEXT: br label [[DOTEXIT:%.*]] +// CHECK14: .exit: +// CHECK14-NEXT: ret void +// CHECK14-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK14-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { +// CHECK14-NEXT: entry: +// CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK14-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK14-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK14-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK14-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 +// CHECK14-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK14-NEXT: [[TMP1:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* @"_openmp_static_kernel$size", align 4 +// CHECK14-NEXT: call void @__kmpc_get_team_static_memory(i16 1, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i32 [[TMP2]], i16 [[TMP1]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) +// CHECK14-NEXT: [[TMP3:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 4 +// CHECK14-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TMP3]], i32 0 +// CHECK14-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct._globalized_locals_ty* +// CHECK14-NEXT: [[L1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK14-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK14-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK14-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] +// CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK14: omp.precond.then: +// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] +// CHECK14-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK14: cond.true: +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: br label [[COND_END:%.*]] +// CHECK14: cond.false: +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: br label [[COND_END]] +// CHECK14: cond.end: +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] +// CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK14: omp.inner.for.cond: +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 +// CHECK14-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] +// CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK14: omp.inner.for.body: +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK14-NEXT: store i32 [[TMP21]], i32* [[N_CASTED]], align 4 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK14-NEXT: store i32 [[TMP23]], i32* [[L_CASTED]], align 4 +// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK14-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP19]] to i8* +// CHECK14-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 +// CHECK14-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP20]] to i8* +// CHECK14-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 +// CHECK14-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP30:%.*]] = inttoptr i32 [[TMP22]] to i8* +// CHECK14-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 4 +// CHECK14-NEXT: [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP32:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* +// CHECK14-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 4 +// CHECK14-NEXT: [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 +// CHECK14-NEXT: [[TMP34:%.*]] = inttoptr i32 [[TMP24]] to i8* +// CHECK14-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 4 +// CHECK14-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK14-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 +// CHECK14-NEXT: [[TMP37:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK14-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP36]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP37]], i32 5) +// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK14: omp.inner.for.inc: +// CHECK14-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] +// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] +// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP44]], [[TMP45]] +// CHECK14-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK14: cond.true11: +// CHECK14-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: br label [[COND_END13:%.*]] +// CHECK14: cond.false12: +// CHECK14-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: br label [[COND_END13]] +// CHECK14: cond.end13: +// CHECK14-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP46]], [[COND_TRUE11]] ], [ [[TMP47]], [[COND_FALSE12]] ] +// CHECK14-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP48]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK14: omp.inner.for.end: +// CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK14: omp.loop.exit: +// CHECK14-NEXT: [[TMP49:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK14-NEXT: [[TMP50:%.*]] = load i32, i32* [[TMP49]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP50]]) +// CHECK14-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP52:%.*]] = icmp ne i32 [[TMP51]], 0 +// CHECK14-NEXT: br i1 [[TMP52]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK14: .omp.lastprivate.then: +// CHECK14-NEXT: [[TMP53:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK14-NEXT: store i32 [[TMP53]], i32* [[L_ADDR]], align 4 +// CHECK14-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK14: .omp.lastprivate.done: +// CHECK14-NEXT: br label [[OMP_PRECOND_END]] +// CHECK14: omp.precond.end: +// CHECK14-NEXT: [[TMP54:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 +// CHECK14-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP54]]) +// CHECK14-NEXT: ret void +// CHECK14-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK14-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { +// CHECK14-NEXT: entry: +// CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK14-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK14-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK14-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK14-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK14-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK14-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 +// CHECK14-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK14-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK14-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK14: omp.precond.then: +// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) +// CHECK14-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK14: omp.dispatch.cond: +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK14-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] +// CHECK14-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK14: cond.true: +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK14-NEXT: br label [[COND_END:%.*]] +// CHECK14: cond.false: +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: br label [[COND_END]] +// CHECK14: cond.end: +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK14: omp.dispatch.body: +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK14: omp.inner.for.cond: +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK14: omp.inner.for.body: +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] +// CHECK14-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK14-NEXT: store i32 [[TMP20]], i32* [[L_ADDR]], align 4 +// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK14: omp.body.continue: +// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK14: omp.inner.for.inc: +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK14: omp.inner.for.end: +// CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK14: omp.dispatch.inc: +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK14: omp.dispatch.end: +// CHECK14-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK14-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK14-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK14-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK14: .omp.lastprivate.then: +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK14-NEXT: store i32 [[TMP30]], i32* [[L_ADDR]], align 4 +// CHECK14-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK14: .omp.lastprivate.done: +// CHECK14-NEXT: br label [[OMP_PRECOND_END]] +// CHECK14: omp.precond.end: +// CHECK14-NEXT: ret void +// CHECK14-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 +// CHECK14-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK14-NEXT: entry: +// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK14-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK14-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK14-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK14-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK14-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK14-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK14-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK14: .execute: +// CHECK14-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK14-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK14-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] +// CHECK14-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK14: .omp.deinit: +// CHECK14-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK14-NEXT: br label [[DOTEXIT:%.*]] +// CHECK14: .exit: +// CHECK14-NEXT: ret void +// CHECK14-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK14-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK14-NEXT: entry: +// CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 +// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK14-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK14-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK14-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK14-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK14-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK14: omp.precond.then: +// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK14-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK14-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK14: cond.true: +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: br label [[COND_END:%.*]] +// CHECK14: cond.false: +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: br label [[COND_END]] +// CHECK14: cond.end: +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK14: omp.inner.for.cond: +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK14-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK14: omp.inner.for.body: +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK14-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK14-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to i8* +// CHECK14-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to i8* +// CHECK14-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 +// CHECK14-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to i8* +// CHECK14-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 +// CHECK14-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP25:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* +// CHECK14-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 +// CHECK14-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK14-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK14-NEXT: [[TMP28:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK14-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP28]], i32 4) +// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK14: omp.inner.for.inc: +// CHECK14-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK14-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] +// CHECK14-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK14: cond.true10: +// CHECK14-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: br label [[COND_END12:%.*]] +// CHECK14: cond.false11: +// CHECK14-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: br label [[COND_END12]] +// CHECK14: cond.end12: +// CHECK14-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] +// CHECK14-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP39]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK14: omp.inner.for.end: +// CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK14: omp.loop.exit: +// CHECK14-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK14-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP41]]) +// CHECK14-NEXT: br label [[OMP_PRECOND_END]] +// CHECK14: omp.precond.end: +// CHECK14-NEXT: ret void +// CHECK14-LABEL: define {{[^@]+}}@__omp_outlined__3 +// CHECK14-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK14-NEXT: entry: +// CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK14-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK14-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK14-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK14-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK14-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK14-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK14-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK14: omp.precond.then: +// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK14: omp.inner.for.cond: +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK14-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] +// CHECK14-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK14: omp.inner.for.body: +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK14-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 +// CHECK14-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK14-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK14-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 +// CHECK14-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX]], align 2 +// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK14: omp.body.continue: +// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK14: omp.inner.for.inc: +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK14: omp.inner.for.end: +// CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK14: omp.loop.exit: +// CHECK14-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) +// CHECK14-NEXT: br label [[OMP_PRECOND_END]] +// CHECK14: omp.precond.end: +// CHECK14-NEXT: ret void +// CHECK14-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 +// CHECK14-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK14-NEXT: entry: +// CHECK14-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK14-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK14-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK14-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK14-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK14-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK14: .execute: +// CHECK14-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK14-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] +// CHECK14-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK14: .omp.deinit: +// CHECK14-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK14-NEXT: br label [[DOTEXIT:%.*]] +// CHECK14: .exit: +// CHECK14-NEXT: ret void +// CHECK14-LABEL: define {{[^@]+}}@__omp_outlined__4 +// CHECK14-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK14-NEXT: entry: +// CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK14-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK14-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK14: cond.true: +// CHECK14-NEXT: br label [[COND_END:%.*]] +// CHECK14: cond.false: +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: br label [[COND_END]] +// CHECK14: cond.end: +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK14: omp.inner.for.cond: +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK14-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK14: omp.inner.for.body: +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK14-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 +// CHECK14-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* +// CHECK14-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP14:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK14-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK14-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP15]], i32 3) +// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK14: omp.inner.for.inc: +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK14-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK14-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 +// CHECK14-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK14: cond.true5: +// CHECK14-NEXT: br label [[COND_END7:%.*]] +// CHECK14: cond.false6: +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: br label [[COND_END7]] +// CHECK14: cond.end7: +// CHECK14-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] +// CHECK14-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK14: omp.inner.for.end: +// CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK14: omp.loop.exit: +// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK14-NEXT: ret void +// CHECK14-LABEL: define {{[^@]+}}@__omp_outlined__5 +// CHECK14-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK14-NEXT: entry: +// CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK14-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK14-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK14-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK14: omp.inner.for.cond: +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK14: omp.inner.for.body: +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK14-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 +// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK14: omp.body.continue: +// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK14: omp.inner.for.inc: +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK14-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK14: omp.inner.for.end: +// CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK14: omp.loop.exit: +// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK14-NEXT: ret void +// CHECK14-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59 +// CHECK14-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK14-NEXT: entry: +// CHECK14-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK14-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK14-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK14-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 +// CHECK14-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK14-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK14-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK14-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK14: .execute: +// CHECK14-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK14-NEXT: store i32 [[TMP2]], i32* [[F_CASTED]], align 4 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK14-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP3]]) #[[ATTR2]] +// CHECK14-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK14: .omp.deinit: +// CHECK14-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK14-NEXT: br label [[DOTEXIT:%.*]] +// CHECK14: .exit: +// CHECK14-NEXT: ret void +// CHECK14-LABEL: define {{[^@]+}}@__omp_outlined__6 +// CHECK14-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK14-NEXT: entry: +// CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK14-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK14-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 +// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK14-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK14-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 +// CHECK14-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK14: cond.true: +// CHECK14-NEXT: br label [[COND_END:%.*]] +// CHECK14: cond.false: +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: br label [[COND_END]] +// CHECK14: cond.end: +// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK14: omp.inner.for.cond: +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 +// CHECK14-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK14: omp.inner.for.body: +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK14-NEXT: store i32 [[TMP9]], i32* [[F_CASTED]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK14-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK14-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to i8* +// CHECK14-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP16:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* +// CHECK14-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP18:%.*]] = inttoptr i32 [[TMP10]] to i8* +// CHECK14-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4 +// CHECK14-NEXT: [[TMP19:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK14-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x [10 x i32]]*, i32)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP19]], i32 4) +// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK14: omp.inner.for.inc: +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK14-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK14-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 +// CHECK14-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK14: cond.true6: +// CHECK14-NEXT: br label [[COND_END8:%.*]] +// CHECK14: cond.false7: +// CHECK14-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: br label [[COND_END8]] +// CHECK14: cond.end8: +// CHECK14-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] +// CHECK14-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK14: omp.inner.for.end: +// CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK14: omp.loop.exit: +// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK14-NEXT: ret void +// CHECK14-LABEL: define {{[^@]+}}@__omp_outlined__7 +// CHECK14-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK14-NEXT: entry: +// CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK14-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK14-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK14-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK14-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK14-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 +// CHECK14-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK14: omp.inner.for.cond: +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK14: omp.inner.for.body: +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK14-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]] +// CHECK14-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK14-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] +// CHECK14-NEXT: store i32 [[ADD5]], i32* [[J]], align 4 +// CHECK14-NEXT: store i32 10, i32* [[K]], align 4 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK14-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] +// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]] +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 +// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]] +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP15]] +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 +// CHECK14-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP16]] +// CHECK14-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX9]], align 4 +// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK14: omp.body.continue: +// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK14: omp.inner.for.inc: +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK14-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK14: omp.inner.for.end: +// CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK14: omp.loop.exit: +// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK14-NEXT: ret void +// CHECK14-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l67 +// CHECK14-SAME: (i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { +// CHECK14-NEXT: entry: +// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 // CHECK14-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK14-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK14-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK14-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 -// CHECK14-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK14-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK14-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 // CHECK14-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK14-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) // CHECK14-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK14: .execute: -// CHECK14-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK14-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) // CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK14-NEXT: store i32 [[TMP4]], i32* [[L_CASTED]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[L_CASTED]], align 4 // CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK14-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i32]* [[TMP0]], i32 [[TMP5]]) #[[ATTR2:[0-9]+]] +// CHECK14-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR2]] // CHECK14-NEXT: br label [[DOTOMP_DEINIT:%.*]] // CHECK14: .omp.deinit: // CHECK14-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) // CHECK14-NEXT: br label [[DOTEXIT:%.*]] // CHECK14: .exit: // CHECK14-NEXT: ret void -// -// -// CHECK14-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK14-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { +// CHECK14-LABEL: define {{[^@]+}}@__omp_outlined__8 +// CHECK14-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK14-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[J10:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK14-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK14-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK14-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK14-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 -// CHECK14-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* @"_openmp_static_kernel$size", align 4 -// CHECK14-NEXT: call void @__kmpc_get_team_static_memory(i16 1, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i32 [[TMP2]], i16 [[TMP1]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) -// CHECK14-NEXT: [[TMP3:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TMP3]], i32 0 -// CHECK14-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct._globalized_locals_ty* -// CHECK14-NEXT: [[L1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 0 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 +// CHECK14-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK14-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK14-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK14-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK14-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK14-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK14-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK14-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK14-NEXT: store i32 0, i32* [[J]], align 4 +// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK14: land.lhs.true: +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK14: omp.precond.then: -// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: store i64 0, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK14-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK14-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -// CHECK14-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK14-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK14-NEXT: [[CONV11:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV11]]) +// CHECK14-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK14-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK14-NEXT: [[CMP12:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]] +// CHECK14-NEXT: br i1 [[CMP12]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] // CHECK14: cond.true: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK14-NEXT: br label [[COND_END:%.*]] // CHECK14: cond.false: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 // CHECK14-NEXT: br label [[COND_END]] // CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] -// CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK14-NEXT: store i64 [[COND]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK14-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK14-NEXT: store i64 [[TMP14]], i64* [[DOTOMP_IV]], align 8 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK14-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] -// CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK14-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK14-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP16]], 1 +// CHECK14-NEXT: [[CMP13:%.*]] = icmp slt i64 [[TMP15]], [[ADD]] +// CHECK14-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK14-NEXT: [[TMP18:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK14-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK14-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 // CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK14-NEXT: store i32 [[TMP21]], i32* [[N_CASTED]], align 4 // CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK14-NEXT: store i32 [[TMP23]], i32* [[L_CASTED]], align 4 -// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[L_CASTED]], align 4 -// CHECK14-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK14-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP19]] to i8* +// CHECK14-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP18]] to i8* +// CHECK14-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 +// CHECK14-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP20]] to i8* // CHECK14-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 -// CHECK14-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK14-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP20]] to i8* +// CHECK14-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP22]] to i8* // CHECK14-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 -// CHECK14-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK14-NEXT: [[TMP30:%.*]] = inttoptr i32 [[TMP22]] to i8* +// CHECK14-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP30:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* // CHECK14-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 4 -// CHECK14-NEXT: [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK14-NEXT: [[TMP32:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK14-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 4 -// CHECK14-NEXT: [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 -// CHECK14-NEXT: [[TMP34:%.*]] = inttoptr i32 [[TMP24]] to i8* -// CHECK14-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 4 -// CHECK14-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK14-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 -// CHECK14-NEXT: [[TMP37:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK14-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP36]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP37]], i32 5) -// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] -// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] -// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] -// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP44]], [[TMP45]] -// CHECK14-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] -// CHECK14: cond.true11: -// CHECK14-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: br label [[COND_END13:%.*]] -// CHECK14: cond.false12: -// CHECK14-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: br label [[COND_END13]] -// CHECK14: cond.end13: -// CHECK14-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP46]], [[COND_TRUE11]] ], [ [[TMP47]], [[COND_FALSE12]] ] -// CHECK14-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP48]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK14-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK14-NEXT: [[TMP33:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK14-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [10 x [10 x i32]]*)* @__omp_outlined__9 to i8*), i8* null, i8** [[TMP33]], i32 4) +// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK14: omp.inner.for.inc: +// CHECK14-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK14-NEXT: [[TMP35:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK14-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP34]], [[TMP35]] +// CHECK14-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_IV]], align 8 +// CHECK14-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK14-NEXT: [[TMP37:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK14-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP36]], [[TMP37]] +// CHECK14-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK14-NEXT: [[TMP38:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK14-NEXT: [[TMP39:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK14-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], [[TMP39]] +// CHECK14-NEXT: store i64 [[ADD16]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK14-NEXT: [[TMP40:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK14-NEXT: [[TMP41:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK14-NEXT: [[CMP17:%.*]] = icmp sgt i64 [[TMP40]], [[TMP41]] +// CHECK14-NEXT: br i1 [[CMP17]], label [[COND_TRUE18:%.*]], label [[COND_FALSE19:%.*]] +// CHECK14: cond.true18: +// CHECK14-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK14-NEXT: br label [[COND_END20:%.*]] +// CHECK14: cond.false19: +// CHECK14-NEXT: [[TMP43:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK14-NEXT: br label [[COND_END20]] +// CHECK14: cond.end20: +// CHECK14-NEXT: [[COND21:%.*]] = phi i64 [ [[TMP42]], [[COND_TRUE18]] ], [ [[TMP43]], [[COND_FALSE19]] ] +// CHECK14-NEXT: store i64 [[COND21]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK14-NEXT: [[TMP44:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK14-NEXT: store i64 [[TMP44]], i64* [[DOTOMP_IV]], align 8 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP49:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK14-NEXT: [[TMP50:%.*]] = load i32, i32* [[TMP49]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP50]]) -// CHECK14-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP52:%.*]] = icmp ne i32 [[TMP51]], 0 -// CHECK14-NEXT: br i1 [[TMP52]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK14: .omp.lastprivate.then: -// CHECK14-NEXT: [[TMP53:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK14-NEXT: store i32 [[TMP53]], i32* [[L_ADDR]], align 4 -// CHECK14-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK14: .omp.lastprivate.done: +// CHECK14-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK14-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP46]]) // CHECK14-NEXT: br label [[OMP_PRECOND_END]] // CHECK14: omp.precond.end: -// CHECK14-NEXT: [[TMP54:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK14-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP54]]) // CHECK14-NEXT: ret void -// -// -// CHECK14-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK14-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { +// CHECK14-LABEL: define {{[^@]+}}@__omp_outlined__9 +// CHECK14-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK14-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[I11:%.*]] = alloca i32, align 4 +// CHECK14-NEXT: [[J12:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK14-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 // CHECK14-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 // CHECK14-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK14-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK14-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 -// CHECK14-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK14-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK14-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 // CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 // CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK14-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK14-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK14-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK14-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK14-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK14-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 // CHECK14-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK14-NEXT: store i32 0, i32* [[J]], align 4 +// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK14-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK14: land.lhs.true: +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] // CHECK14: omp.precond.then: -// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 +// CHECK14-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK14-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_UB]], align 8 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK14-NEXT: [[CONV9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK14-NEXT: [[CONV10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK14-NEXT: store i64 [[CONV9]], i64* [[DOTOMP_LB]], align 8 +// CHECK14-NEXT: store i64 [[CONV10]], i64* [[DOTOMP_UB]], align 8 +// CHECK14-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 // CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) -// CHECK14-NEXT: br label [[OMP_DISPATCH_COND:%.*]] -// CHECK14: omp.dispatch.cond: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK14-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// CHECK14-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK14: cond.true: -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK14-NEXT: br label [[COND_END:%.*]] -// CHECK14: cond.false: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: br label [[COND_END]] -// CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] -// CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] -// CHECK14: omp.dispatch.body: +// CHECK14-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 33, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK14-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK14-NEXT: store i64 [[TMP12]], i64* [[DOTOMP_IV]], align 8 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK14-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK14-NEXT: [[CONV13:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK14-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP13]], [[CONV13]] +// CHECK14-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK14-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK14-NEXT: store i32 [[TMP20]], i32* [[L_ADDR]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP16]], 0 +// CHECK14-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK14-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// CHECK14-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// CHECK14-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP15]], [[CONV18]] +// CHECK14-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// CHECK14-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK14-NEXT: store i32 [[CONV21]], i32* [[I11]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK14-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP19]], 0 +// CHECK14-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// CHECK14-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// CHECK14-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// CHECK14-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP18]], [[CONV25]] +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP20]], 0 +// CHECK14-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// CHECK14-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// CHECK14-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// CHECK14-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// CHECK14-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP17]], [[MUL31]] +// CHECK14-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// CHECK14-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// CHECK14-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// CHECK14-NEXT: store i32 [[CONV35]], i32* [[J12]], align 4 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[I11]], align 4 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[J12]], align 4 +// CHECK14-NEXT: [[ADD36:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[I11]], align 4 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP23]] +// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[J12]], align 4 +// CHECK14-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP24]] +// CHECK14-NEXT: store i32 [[ADD36]], i32* [[ARRAYIDX37]], align 4 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK14-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK14-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] +// CHECK14-NEXT: store i64 [[ADD38]], i64* [[DOTOMP_IV]], align 8 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK14: omp.inner.for.end: -// CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] -// CHECK14: omp.dispatch.inc: -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: br label [[OMP_DISPATCH_COND]] -// CHECK14: omp.dispatch.end: -// CHECK14-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK14-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) -// CHECK14-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK14-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK14: .omp.lastprivate.then: -// CHECK14-NEXT: [[TMP30:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK14-NEXT: store i32 [[TMP30]], i32* [[L_ADDR]], align 4 -// CHECK14-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK14: .omp.lastprivate.done: +// CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK14: omp.loop.exit: +// CHECK14-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK14-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) // CHECK14-NEXT: br label [[OMP_PRECOND_END]] // CHECK14: omp.precond.end: // CHECK14-NEXT: ret void -// -// -// CHECK14-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 -// CHECK14-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK14-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74 +// CHECK14-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK14-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 4 // CHECK14-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK14-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 // CHECK14-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK14-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK14-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK14-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK14-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 +// CHECK14-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 // CHECK14-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK14-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) // CHECK14-NEXT: br label [[DOTEXECUTE:%.*]] @@ -21038,23 +15212,23 @@ int bar(int n){ // CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK14-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 // CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK14-NEXT: [[TMP4:%.*]] = load i32*, i32** [[V_ADDR]], align 4 // CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK14-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] +// CHECK14-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i32]* [[TMP0]], i32* [[TMP4]]) #[[ATTR2]] // CHECK14-NEXT: br label [[DOTOMP_DEINIT:%.*]] // CHECK14: .omp.deinit: // CHECK14-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) // CHECK14-NEXT: br label [[DOTEXIT:%.*]] // CHECK14: .exit: // CHECK14-NEXT: ret void -// -// -// CHECK14-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK14-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK14-LABEL: define {{[^@]+}}@__omp_outlined__10 +// CHECK14-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK14-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 4 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -21066,12 +15240,13 @@ int bar(int n){ // CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[I3:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 +// CHECK14-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 4 // CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 // CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 // CHECK14-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK14-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK14-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK14-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK14-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 +// CHECK14-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 // CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -21121,72 +15296,75 @@ int bar(int n){ // CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK14-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 // CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK14-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK14-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to i8* -// CHECK14-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 4 -// CHECK14-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK14-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to i8* -// CHECK14-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 -// CHECK14-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK14-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK14-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 -// CHECK14-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK14-NEXT: [[TMP25:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* -// CHECK14-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 -// CHECK14-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK14-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK14-NEXT: [[TMP28:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK14-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP28]], i32 4) +// CHECK14-NEXT: [[TMP18:%.*]] = load i32*, i32** [[V_ADDR]], align 4 +// CHECK14-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK14-NEXT: [[TMP20:%.*]] = inttoptr i32 [[TMP14]] to i8* +// CHECK14-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 4 +// CHECK14-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK14-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP15]] to i8* +// CHECK14-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 4 +// CHECK14-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK14-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP17]] to i8* +// CHECK14-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 +// CHECK14-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK14-NEXT: [[TMP26:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* +// CHECK14-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 +// CHECK14-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 +// CHECK14-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP18]] to i8* +// CHECK14-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 +// CHECK14-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK14-NEXT: [[TMP31:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK14-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @__omp_outlined__11 to i8*), i8* null, i8** [[TMP31]], i32 5) // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK14-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] // CHECK14-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK14-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] // CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK14-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] // CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] +// CHECK14-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] // CHECK14-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] // CHECK14: cond.true10: -// CHECK14-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK14-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 // CHECK14-NEXT: br label [[COND_END12:%.*]] // CHECK14: cond.false11: -// CHECK14-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 // CHECK14-NEXT: br label [[COND_END12]] // CHECK14: cond.end12: -// CHECK14-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] +// CHECK14-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE10]] ], [ [[TMP41]], [[COND_FALSE11]] ] // CHECK14-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP39]], i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: store i32 [[TMP42]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK14-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP41]]) +// CHECK14-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK14-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]]) // CHECK14-NEXT: br label [[OMP_PRECOND_END]] // CHECK14: omp.precond.end: // CHECK14-NEXT: ret void -// -// -// CHECK14-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK14-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK14-LABEL: define {{[^@]+}}@__omp_outlined__11 +// CHECK14-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { // CHECK14-NEXT: entry: // CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 // CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK14-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 4 // CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 // CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 @@ -21202,8 +15380,9 @@ int bar(int n){ // CHECK14-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 // CHECK14-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 // CHECK14-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK14-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK14-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK14-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK14-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 +// CHECK14-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 // CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 // CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 @@ -21231,4117 +15410,9360 @@ int bar(int n){ // CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 // CHECK14-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK14-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] -// CHECK14-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK14-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 -// CHECK14-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 -// CHECK14-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1 -// CHECK14-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK14-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX]], align 2 -// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK14: omp.body.continue: -// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK14: omp.inner.for.end: -// CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK14-NEXT: br label [[OMP_PRECOND_END]] -// CHECK14: omp.precond.end: -// CHECK14-NEXT: ret void -// -// -// CHECK14-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 -// CHECK14-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK14-NEXT: entry: -// CHECK14-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK14-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK14-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK14-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK14-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK14-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK14: .execute: -// CHECK14-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK14-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] -// CHECK14-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK14: .omp.deinit: -// CHECK14-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK14-NEXT: br label [[DOTEXIT:%.*]] -// CHECK14: .exit: -// CHECK14-NEXT: ret void -// -// -// CHECK14-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK14-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK14-NEXT: entry: -// CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK14-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 -// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK14-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 -// CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK14: cond.true: -// CHECK14-NEXT: br label [[COND_END:%.*]] -// CHECK14: cond.false: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: br label [[COND_END]] -// CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] -// CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 -// CHECK14-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK14-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK14-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 -// CHECK14-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK14-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK14-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK14-NEXT: [[TMP14:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK14-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK14-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP15]], i32 3) -// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK14-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK14-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 -// CHECK14-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK14: cond.true5: -// CHECK14-NEXT: br label [[COND_END7:%.*]] -// CHECK14: cond.false6: -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: br label [[COND_END7]] -// CHECK14: cond.end7: -// CHECK14-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] -// CHECK14-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 +// CHECK14: omp.inner.for.cond: +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK14-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] +// CHECK14-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK14: omp.inner.for.body: +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32*, i32** [[V_ADDR]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i32 [[TMP14]] +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK14-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP16]] +// CHECK14-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX5]], align 4 +// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK14: omp.body.continue: +// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK14: omp.inner.for.inc: +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK14-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: -// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK14-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) +// CHECK14-NEXT: br label [[OMP_PRECOND_END]] +// CHECK14: omp.precond.end: // CHECK14-NEXT: ret void +// CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 +// CHECK15-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK15-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK15-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK15-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK15: .execute: +// CHECK15-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[L_CASTED]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK15-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i32]* [[TMP0]], i32 [[TMP5]]) #[[ATTR2:[0-9]+]] +// CHECK15-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK15: .omp.deinit: +// CHECK15-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK15-NEXT: br label [[DOTEXIT:%.*]] +// CHECK15: .exit: +// CHECK15-NEXT: ret void +// CHECK15-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK15-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK15-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 4, i16 1) +// CHECK15-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct._globalized_locals_ty* +// CHECK15-NEXT: [[L1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP2]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK15-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK15: omp.precond.then: +// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK15: cond.true: +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: br label [[COND_END:%.*]] +// CHECK15: cond.false: +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: br label [[COND_END]] +// CHECK15: cond.end: +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK15: omp.inner.for.cond: +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK15-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK15: omp.inner.for.body: +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], i32* [[L_CASTED]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP16]] to i8* +// CHECK15-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to i8* +// CHECK15-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP27:%.*]] = inttoptr i32 [[TMP19]] to i8* +// CHECK15-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP29:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* +// CHECK15-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP31:%.*]] = inttoptr i32 [[TMP21]] to i8* +// CHECK15-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 4 +// CHECK15-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK15-NEXT: [[TMP34:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK15-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP34]], i32 5) +// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK15: omp.inner.for.inc: +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK15-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK15-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] +// CHECK15-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] +// CHECK15-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK15: cond.true11: +// CHECK15-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: br label [[COND_END13:%.*]] +// CHECK15: cond.false12: +// CHECK15-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: br label [[COND_END13]] +// CHECK15: cond.end13: +// CHECK15-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE11]] ], [ [[TMP44]], [[COND_FALSE12]] ] +// CHECK15-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK15: omp.inner.for.end: +// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK15: omp.loop.exit: +// CHECK15-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]]) +// CHECK15-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +// CHECK15-NEXT: br i1 [[TMP49]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK15: .omp.lastprivate.then: +// CHECK15-NEXT: [[TMP50:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP50]], i32* [[L_ADDR]], align 4 +// CHECK15-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK15: .omp.lastprivate.done: +// CHECK15-NEXT: br label [[OMP_PRECOND_END]] +// CHECK15: omp.precond.end: +// CHECK15-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP1]]) +// CHECK15-NEXT: ret void +// CHECK15-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK15-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK15-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK15-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK15: omp.precond.then: +// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) +// CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK15: omp.dispatch.cond: +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] +// CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK15: cond.true: +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: br label [[COND_END:%.*]] +// CHECK15: cond.false: +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: br label [[COND_END]] +// CHECK15: cond.end: +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK15: omp.dispatch.body: +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK15: omp.inner.for.cond: +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK15: omp.inner.for.body: +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] +// CHECK15-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK15-NEXT: store i32 [[TMP20]], i32* [[L_ADDR]], align 4 +// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK15: omp.body.continue: +// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK15: omp.inner.for.inc: +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK15-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK15: omp.inner.for.end: +// CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK15: omp.dispatch.inc: +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK15-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK15-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK15: omp.dispatch.end: +// CHECK15-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK15-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK15: .omp.lastprivate.then: +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP30]], i32* [[L_ADDR]], align 4 +// CHECK15-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK15: .omp.lastprivate.done: +// CHECK15-NEXT: br label [[OMP_PRECOND_END]] +// CHECK15: omp.precond.end: +// CHECK15-NEXT: ret void +// CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 +// CHECK15-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK15-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK15-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK15-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK15: .execute: +// CHECK15-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK15-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] +// CHECK15-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK15: .omp.deinit: +// CHECK15-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK15-NEXT: br label [[DOTEXIT:%.*]] +// CHECK15: .exit: +// CHECK15-NEXT: ret void +// CHECK15-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK15-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 +// CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK15-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK15: omp.precond.then: +// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK15-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK15: cond.true: +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: br label [[COND_END:%.*]] +// CHECK15: cond.false: +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: br label [[COND_END]] +// CHECK15: cond.end: +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK15: omp.inner.for.cond: +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK15: omp.inner.for.body: +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to i8* +// CHECK15-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to i8* +// CHECK15-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to i8* +// CHECK15-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP25:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* +// CHECK15-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK15-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP28]], i32 4) +// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK15: omp.inner.for.inc: +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK15-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK15-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK15-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] +// CHECK15-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK15: cond.true10: +// CHECK15-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: br label [[COND_END12:%.*]] +// CHECK15: cond.false11: +// CHECK15-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: br label [[COND_END12]] +// CHECK15: cond.end12: +// CHECK15-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] +// CHECK15-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP39]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK15: omp.inner.for.end: +// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK15: omp.loop.exit: +// CHECK15-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP41]]) +// CHECK15-NEXT: br label [[OMP_PRECOND_END]] +// CHECK15: omp.precond.end: +// CHECK15-NEXT: ret void +// CHECK15-LABEL: define {{[^@]+}}@__omp_outlined__3 +// CHECK15-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK15-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK15: omp.precond.then: +// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK15: omp.inner.for.cond: +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK15: omp.inner.for.body: +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 +// CHECK15-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK15-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 +// CHECK15-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX]], align 2 +// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK15: omp.body.continue: +// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK15: omp.inner.for.inc: +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK15-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK15: omp.inner.for.end: +// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK15: omp.loop.exit: +// CHECK15-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) +// CHECK15-NEXT: br label [[OMP_PRECOND_END]] +// CHECK15: omp.precond.end: +// CHECK15-NEXT: ret void +// CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 +// CHECK15-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK15-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK15-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK15-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK15-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK15-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK15: .execute: +// CHECK15-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK15-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] +// CHECK15-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK15: .omp.deinit: +// CHECK15-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK15-NEXT: br label [[DOTEXIT:%.*]] +// CHECK15: .exit: +// CHECK15-NEXT: ret void +// CHECK15-LABEL: define {{[^@]+}}@__omp_outlined__4 +// CHECK15-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK15-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK15: cond.true: +// CHECK15-NEXT: br label [[COND_END:%.*]] +// CHECK15: cond.false: +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: br label [[COND_END]] +// CHECK15: cond.end: +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK15: omp.inner.for.cond: +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK15: omp.inner.for.body: +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK15-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* +// CHECK15-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP14:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK15-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK15-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP15]], i32 3) +// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK15: omp.inner.for.inc: +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK15-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK15-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 +// CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK15: cond.true5: +// CHECK15-NEXT: br label [[COND_END7:%.*]] +// CHECK15: cond.false6: +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: br label [[COND_END7]] +// CHECK15: cond.end7: +// CHECK15-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] +// CHECK15-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK15: omp.inner.for.end: +// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK15: omp.loop.exit: +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: ret void +// CHECK15-LABEL: define {{[^@]+}}@__omp_outlined__5 +// CHECK15-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK15: omp.inner.for.cond: +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK15: omp.inner.for.body: +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 +// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK15: omp.body.continue: +// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK15: omp.inner.for.inc: +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK15-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK15: omp.inner.for.end: +// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK15: omp.loop.exit: +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK15-NEXT: ret void +// CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59 +// CHECK15-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK15-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK15-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK15-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK15-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK15-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK15: .execute: +// CHECK15-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[F_CASTED]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK15-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP3]]) #[[ATTR2]] +// CHECK15-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK15: .omp.deinit: +// CHECK15-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK15-NEXT: br label [[DOTEXIT:%.*]] +// CHECK15: .exit: +// CHECK15-NEXT: ret void +// CHECK15-LABEL: define {{[^@]+}}@__omp_outlined__6 +// CHECK15-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK15-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 +// CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK15-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK15: cond.true: +// CHECK15-NEXT: br label [[COND_END:%.*]] +// CHECK15: cond.false: +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: br label [[COND_END]] +// CHECK15: cond.end: +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK15: omp.inner.for.cond: +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 +// CHECK15-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK15: omp.inner.for.body: +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], i32* [[F_CASTED]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK15-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to i8* +// CHECK15-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP16:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* +// CHECK15-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP18:%.*]] = inttoptr i32 [[TMP10]] to i8* +// CHECK15-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK15-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x [10 x i32]]*, i32)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP19]], i32 4) +// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK15: omp.inner.for.inc: +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK15-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK15-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 +// CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK15: cond.true6: +// CHECK15-NEXT: br label [[COND_END8:%.*]] +// CHECK15: cond.false7: +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: br label [[COND_END8]] +// CHECK15: cond.end8: +// CHECK15-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] +// CHECK15-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK15: omp.inner.for.end: +// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK15: omp.loop.exit: +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK15-NEXT: ret void +// CHECK15-LABEL: define {{[^@]+}}@__omp_outlined__7 +// CHECK15-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK15-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK15: omp.inner.for.cond: +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK15: omp.inner.for.body: +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK15-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]] +// CHECK15-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] +// CHECK15-NEXT: store i32 [[ADD5]], i32* [[J]], align 4 +// CHECK15-NEXT: store i32 10, i32* [[K]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK15-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]] +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 +// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP15]] +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 +// CHECK15-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP16]] +// CHECK15-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX9]], align 4 +// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK15: omp.body.continue: +// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK15: omp.inner.for.inc: +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK15-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK15: omp.inner.for.end: +// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK15: omp.loop.exit: +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK15-NEXT: ret void +// CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l67 +// CHECK15-SAME: (i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK15-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK15-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK15-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK15: .execute: +// CHECK15-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK15-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR2]] +// CHECK15-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK15: .omp.deinit: +// CHECK15-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK15-NEXT: br label [[DOTEXIT:%.*]] +// CHECK15: .exit: +// CHECK15-NEXT: ret void +// CHECK15-LABEL: define {{[^@]+}}@__omp_outlined__8 +// CHECK15-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 +// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 +// CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 +// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 +// CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK15-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK15-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK15-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK15-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK15-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[J]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK15: land.lhs.true: +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK15: omp.precond.then: +// CHECK15-NEXT: store i64 0, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK15-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK15-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK15-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK15-NEXT: [[CONV11:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV11]]) +// CHECK15-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK15-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK15-NEXT: [[CMP12:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: br i1 [[CMP12]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK15: cond.true: +// CHECK15-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK15-NEXT: br label [[COND_END:%.*]] +// CHECK15: cond.false: +// CHECK15-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK15-NEXT: br label [[COND_END]] +// CHECK15: cond.end: +// CHECK15-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK15-NEXT: store i64 [[COND]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK15-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK15-NEXT: store i64 [[TMP14]], i64* [[DOTOMP_IV]], align 8 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK15: omp.inner.for.cond: +// CHECK15-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK15-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP16]], 1 +// CHECK15-NEXT: [[CMP13:%.*]] = icmp slt i64 [[TMP15]], [[ADD]] +// CHECK15-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK15: omp.inner.for.body: +// CHECK15-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK15-NEXT: [[TMP18:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK15-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK15-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP21]], i32* [[N_CASTED]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP18]] to i8* +// CHECK15-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP20]] to i8* +// CHECK15-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP22]] to i8* +// CHECK15-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP30:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* +// CHECK15-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK15-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [10 x [10 x i32]]*)* @__omp_outlined__9 to i8*), i8* null, i8** [[TMP33]], i32 4) +// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK15: omp.inner.for.inc: +// CHECK15-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK15-NEXT: [[TMP35:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK15-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP34]], [[TMP35]] +// CHECK15-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_IV]], align 8 +// CHECK15-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK15-NEXT: [[TMP37:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK15-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP36]], [[TMP37]] +// CHECK15-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK15-NEXT: [[TMP38:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK15-NEXT: [[TMP39:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK15-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], [[TMP39]] +// CHECK15-NEXT: store i64 [[ADD16]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK15-NEXT: [[TMP40:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK15-NEXT: [[TMP41:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK15-NEXT: [[CMP17:%.*]] = icmp sgt i64 [[TMP40]], [[TMP41]] +// CHECK15-NEXT: br i1 [[CMP17]], label [[COND_TRUE18:%.*]], label [[COND_FALSE19:%.*]] +// CHECK15: cond.true18: +// CHECK15-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK15-NEXT: br label [[COND_END20:%.*]] +// CHECK15: cond.false19: +// CHECK15-NEXT: [[TMP43:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK15-NEXT: br label [[COND_END20]] +// CHECK15: cond.end20: +// CHECK15-NEXT: [[COND21:%.*]] = phi i64 [ [[TMP42]], [[COND_TRUE18]] ], [ [[TMP43]], [[COND_FALSE19]] ] +// CHECK15-NEXT: store i64 [[COND21]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK15-NEXT: [[TMP44:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK15-NEXT: store i64 [[TMP44]], i64* [[DOTOMP_IV]], align 8 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK15: omp.inner.for.end: +// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK15: omp.loop.exit: +// CHECK15-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP46]]) +// CHECK15-NEXT: br label [[OMP_PRECOND_END]] +// CHECK15: omp.precond.end: +// CHECK15-NEXT: ret void +// CHECK15-LABEL: define {{[^@]+}}@__omp_outlined__9 +// CHECK15-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 +// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[I11:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK15-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK15-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK15-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK15-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK15-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK15-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[J]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK15-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK15: land.lhs.true: +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK15: omp.precond.then: +// CHECK15-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 +// CHECK15-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK15-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_UB]], align 8 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK15-NEXT: [[CONV9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: [[CONV10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK15-NEXT: store i64 [[CONV9]], i64* [[DOTOMP_LB]], align 8 +// CHECK15-NEXT: store i64 [[CONV10]], i64* [[DOTOMP_UB]], align 8 +// CHECK15-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 33, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK15-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK15-NEXT: store i64 [[TMP12]], i64* [[DOTOMP_IV]], align 8 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK15: omp.inner.for.cond: +// CHECK15-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: [[CONV13:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK15-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP13]], [[CONV13]] +// CHECK15-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK15: omp.inner.for.body: +// CHECK15-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP16]], 0 +// CHECK15-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK15-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// CHECK15-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// CHECK15-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP15]], [[CONV18]] +// CHECK15-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// CHECK15-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK15-NEXT: store i32 [[CONV21]], i32* [[I11]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK15-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP19]], 0 +// CHECK15-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// CHECK15-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// CHECK15-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// CHECK15-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP18]], [[CONV25]] +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP20]], 0 +// CHECK15-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// CHECK15-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// CHECK15-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// CHECK15-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// CHECK15-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP17]], [[MUL31]] +// CHECK15-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// CHECK15-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// CHECK15-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// CHECK15-NEXT: store i32 [[CONV35]], i32* [[J12]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[I11]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[J12]], align 4 +// CHECK15-NEXT: [[ADD36:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[I11]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP23]] +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[J12]], align 4 +// CHECK15-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP24]] +// CHECK15-NEXT: store i32 [[ADD36]], i32* [[ARRAYIDX37]], align 4 +// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK15: omp.body.continue: +// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK15: omp.inner.for.inc: +// CHECK15-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK15-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK15-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] +// CHECK15-NEXT: store i64 [[ADD38]], i64* [[DOTOMP_IV]], align 8 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK15: omp.inner.for.end: +// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK15: omp.loop.exit: +// CHECK15-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) +// CHECK15-NEXT: br label [[OMP_PRECOND_END]] +// CHECK15: omp.precond.end: +// CHECK15-NEXT: ret void +// CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74 +// CHECK15-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK15-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK15-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK15-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK15: .execute: +// CHECK15-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32*, i32** [[V_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK15-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i32]* [[TMP0]], i32* [[TMP4]]) #[[ATTR2]] +// CHECK15-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK15: .omp.deinit: +// CHECK15-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK15-NEXT: br label [[DOTEXIT:%.*]] +// CHECK15: .exit: +// CHECK15-NEXT: ret void +// CHECK15-LABEL: define {{[^@]+}}@__omp_outlined__10 +// CHECK15-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK15-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK15-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK15: omp.precond.then: +// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK15-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK15: cond.true: +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: br label [[COND_END:%.*]] +// CHECK15: cond.false: +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: br label [[COND_END]] +// CHECK15: cond.end: +// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK15: omp.inner.for.cond: +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK15-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK15: omp.inner.for.body: +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32*, i32** [[V_ADDR]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK15-NEXT: [[TMP20:%.*]] = inttoptr i32 [[TMP14]] to i8* +// CHECK15-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK15-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP15]] to i8* +// CHECK15-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK15-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP17]] to i8* +// CHECK15-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 +// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK15-NEXT: [[TMP26:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* +// CHECK15-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 +// CHECK15-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 +// CHECK15-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP18]] to i8* +// CHECK15-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK15-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @__omp_outlined__11 to i8*), i8* null, i8** [[TMP31]], i32 5) +// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK15: omp.inner.for.inc: +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK15-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK15-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK15-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] +// CHECK15-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK15: cond.true10: +// CHECK15-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: br label [[COND_END12:%.*]] +// CHECK15: cond.false11: +// CHECK15-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: br label [[COND_END12]] +// CHECK15: cond.end12: +// CHECK15-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE10]] ], [ [[TMP41]], [[COND_FALSE11]] ] +// CHECK15-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP42]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK15: omp.inner.for.end: +// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK15: omp.loop.exit: +// CHECK15-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]]) +// CHECK15-NEXT: br label [[OMP_PRECOND_END]] +// CHECK15: omp.precond.end: +// CHECK15-NEXT: ret void +// CHECK15-LABEL: define {{[^@]+}}@__omp_outlined__11 +// CHECK15-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { +// CHECK15-NEXT: entry: +// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK15-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 4 +// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 +// CHECK15-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK15-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK15: omp.precond.then: +// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK15: omp.inner.for.cond: +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK15-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] +// CHECK15-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK15: omp.inner.for.body: +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32*, i32** [[V_ADDR]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i32 [[TMP14]] +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK15-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP16]] +// CHECK15-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX5]], align 4 +// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK15: omp.body.continue: +// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK15: omp.inner.for.inc: +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK15-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK15: omp.inner.for.end: +// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK15: omp.loop.exit: +// CHECK15-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) +// CHECK15-NEXT: br label [[OMP_PRECOND_END]] +// CHECK15: omp.precond.end: +// CHECK15-NEXT: ret void +// CHECK16-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 +// CHECK16-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK16-NEXT: entry: +// CHECK16-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK16-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK16-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 +// CHECK16-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK16-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK16-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK16-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK16: .execute: +// CHECK16-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[TMP4]], i32* [[L_CASTED]], align 4 +// CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK16-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i32]* [[TMP0]], i32 [[TMP5]]) #[[ATTR2:[0-9]+]] +// CHECK16-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK16: .omp.deinit: +// CHECK16-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK16-NEXT: br label [[DOTEXIT:%.*]] +// CHECK16: .exit: +// CHECK16-NEXT: ret void +// CHECK16-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK16-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { +// CHECK16-NEXT: entry: +// CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK16-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK16-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK16-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK16-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK16-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 +// CHECK16-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK16-NEXT: [[TMP1:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 4, i16 1) +// CHECK16-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct._globalized_locals_ty* +// CHECK16-NEXT: [[L1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP2]], i32 0, i32 0 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK16-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK16-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK16-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK16-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK16-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK16-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK16: omp.precond.then: +// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK16-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK16-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK16-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK16-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK16: cond.true: +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK16-NEXT: br label [[COND_END:%.*]] +// CHECK16: cond.false: +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: br label [[COND_END]] +// CHECK16: cond.end: +// CHECK16-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK16-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK16: omp.inner.for.cond: +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK16-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK16-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK16: omp.inner.for.body: +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4 +// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[TMP20]], i32* [[L_CASTED]], align 4 +// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK16-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK16-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP16]] to i8* +// CHECK16-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 +// CHECK16-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK16-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to i8* +// CHECK16-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 +// CHECK16-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK16-NEXT: [[TMP27:%.*]] = inttoptr i32 [[TMP19]] to i8* +// CHECK16-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4 +// CHECK16-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK16-NEXT: [[TMP29:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* +// CHECK16-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4 +// CHECK16-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 +// CHECK16-NEXT: [[TMP31:%.*]] = inttoptr i32 [[TMP21]] to i8* +// CHECK16-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 4 +// CHECK16-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK16-NEXT: [[TMP34:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK16-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP34]], i32 5) +// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK16: omp.inner.for.inc: +// CHECK16-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK16-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK16-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] +// CHECK16-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK16-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] +// CHECK16-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK16: cond.true11: +// CHECK16-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK16-NEXT: br label [[COND_END13:%.*]] +// CHECK16: cond.false12: +// CHECK16-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: br label [[COND_END13]] +// CHECK16: cond.end13: +// CHECK16-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE11]] ], [ [[TMP44]], [[COND_FALSE12]] ] +// CHECK16-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK16: omp.inner.for.end: +// CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK16: omp.loop.exit: +// CHECK16-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 +// CHECK16-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]]) +// CHECK16-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK16-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +// CHECK16-NEXT: br i1 [[TMP49]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK16: .omp.lastprivate.then: +// CHECK16-NEXT: [[TMP50:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[TMP50]], i32* [[L_ADDR]], align 4 +// CHECK16-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK16: .omp.lastprivate.done: +// CHECK16-NEXT: br label [[OMP_PRECOND_END]] +// CHECK16: omp.precond.end: +// CHECK16-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP1]]) +// CHECK16-NEXT: ret void +// CHECK16-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK16-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { +// CHECK16-NEXT: entry: +// CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK16-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK16-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK16-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK16-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK16-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK16-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 +// CHECK16-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK16-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK16-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK16-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK16-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK16-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK16-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK16: omp.precond.then: +// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK16-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK16-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK16-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) +// CHECK16-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK16: omp.dispatch.cond: +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK16-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] +// CHECK16-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK16: cond.true: +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK16-NEXT: br label [[COND_END:%.*]] +// CHECK16: cond.false: +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: br label [[COND_END]] +// CHECK16: cond.end: +// CHECK16-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK16-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK16-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK16-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK16: omp.dispatch.body: +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK16: omp.inner.for.cond: +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK16-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK16: omp.inner.for.body: +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] +// CHECK16-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK16-NEXT: store i32 [[TMP20]], i32* [[L_ADDR]], align 4 +// CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK16: omp.body.continue: +// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK16: omp.inner.for.inc: +// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK16-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK16: omp.inner.for.end: +// CHECK16-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK16: omp.dispatch.inc: +// CHECK16-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK16-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK16-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK16-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK16-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK16: omp.dispatch.end: +// CHECK16-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK16-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK16-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK16-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK16-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK16: .omp.lastprivate.then: +// CHECK16-NEXT: [[TMP30:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[TMP30]], i32* [[L_ADDR]], align 4 +// CHECK16-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK16: .omp.lastprivate.done: +// CHECK16-NEXT: br label [[OMP_PRECOND_END]] +// CHECK16: omp.precond.end: +// CHECK16-NEXT: ret void +// CHECK16-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 +// CHECK16-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK16-NEXT: entry: +// CHECK16-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK16-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK16-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK16-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK16-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK16-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK16-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK16: .execute: +// CHECK16-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK16-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] +// CHECK16-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK16: .omp.deinit: +// CHECK16-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK16-NEXT: br label [[DOTEXIT:%.*]] +// CHECK16: .exit: +// CHECK16-NEXT: ret void +// CHECK16-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK16-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK16-NEXT: entry: +// CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK16-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK16-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 +// CHECK16-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK16-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK16-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK16-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK16-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK16-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK16-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK16-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK16-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK16-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK16: omp.precond.then: +// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK16-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK16-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK16-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK16-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK16-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK16-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK16: cond.true: +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK16-NEXT: br label [[COND_END:%.*]] +// CHECK16: cond.false: +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: br label [[COND_END]] +// CHECK16: cond.end: +// CHECK16-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK16-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK16: omp.inner.for.cond: +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK16-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK16-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK16: omp.inner.for.body: +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK16-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK16-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to i8* +// CHECK16-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 4 +// CHECK16-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK16-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to i8* +// CHECK16-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 +// CHECK16-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK16-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to i8* +// CHECK16-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 +// CHECK16-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK16-NEXT: [[TMP25:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* +// CHECK16-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 +// CHECK16-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK16-NEXT: [[TMP28:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK16-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP28]], i32 4) +// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK16: omp.inner.for.inc: +// CHECK16-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK16-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK16-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK16-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK16-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] +// CHECK16-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK16: cond.true10: +// CHECK16-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK16-NEXT: br label [[COND_END12:%.*]] +// CHECK16: cond.false11: +// CHECK16-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: br label [[COND_END12]] +// CHECK16: cond.end12: +// CHECK16-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] +// CHECK16-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: store i32 [[TMP39]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK16: omp.inner.for.end: +// CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK16: omp.loop.exit: +// CHECK16-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 +// CHECK16-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP41]]) +// CHECK16-NEXT: br label [[OMP_PRECOND_END]] +// CHECK16: omp.precond.end: +// CHECK16-NEXT: ret void +// CHECK16-LABEL: define {{[^@]+}}@__omp_outlined__3 +// CHECK16-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK16-NEXT: entry: +// CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK16-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK16-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK16-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK16-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK16-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK16-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK16-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK16-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK16-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK16-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK16-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK16-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK16-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK16: omp.precond.then: +// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK16-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK16-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK16-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK16-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK16: omp.inner.for.cond: +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK16-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] +// CHECK16-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK16: omp.inner.for.body: +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK16-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 +// CHECK16-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK16-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK16-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 +// CHECK16-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX]], align 2 +// CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK16: omp.body.continue: +// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK16: omp.inner.for.inc: +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK16-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK16: omp.inner.for.end: +// CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK16: omp.loop.exit: +// CHECK16-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK16-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) +// CHECK16-NEXT: br label [[OMP_PRECOND_END]] +// CHECK16: omp.precond.end: +// CHECK16-NEXT: ret void +// CHECK16-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 +// CHECK16-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK16-NEXT: entry: +// CHECK16-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK16-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK16-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK16-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK16-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK16-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK16-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK16: .execute: +// CHECK16-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK16-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] +// CHECK16-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK16: .omp.deinit: +// CHECK16-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK16-NEXT: br label [[DOTEXIT:%.*]] +// CHECK16: .exit: +// CHECK16-NEXT: ret void +// CHECK16-LABEL: define {{[^@]+}}@__omp_outlined__4 +// CHECK16-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK16-NEXT: entry: +// CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK16-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK16-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK16-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK16-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK16-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK16-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK16-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK16-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK16-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK16: cond.true: +// CHECK16-NEXT: br label [[COND_END:%.*]] +// CHECK16: cond.false: +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: br label [[COND_END]] +// CHECK16: cond.end: +// CHECK16-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK16-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK16: omp.inner.for.cond: +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK16-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK16: omp.inner.for.body: +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK16-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK16-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 +// CHECK16-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK16-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* +// CHECK16-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 +// CHECK16-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK16-NEXT: [[TMP14:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK16-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 +// CHECK16-NEXT: [[TMP15:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK16-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP15]], i32 3) +// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK16: omp.inner.for.inc: +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK16-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK16-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 +// CHECK16-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK16: cond.true5: +// CHECK16-NEXT: br label [[COND_END7:%.*]] +// CHECK16: cond.false6: +// CHECK16-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: br label [[COND_END7]] +// CHECK16: cond.end7: +// CHECK16-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] +// CHECK16-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK16: omp.inner.for.end: +// CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK16: omp.loop.exit: +// CHECK16-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK16-NEXT: ret void +// CHECK16-LABEL: define {{[^@]+}}@__omp_outlined__5 +// CHECK16-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK16-NEXT: entry: +// CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK16-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK16-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK16-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK16-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK16-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK16-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK16-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 +// CHECK16-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK16-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK16: omp.inner.for.cond: +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK16-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK16: omp.inner.for.body: +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 +// CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK16: omp.body.continue: +// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK16: omp.inner.for.inc: +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK16-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK16: omp.inner.for.end: +// CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK16: omp.loop.exit: +// CHECK16-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK16-NEXT: ret void +// CHECK16-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59 +// CHECK16-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK16-NEXT: entry: +// CHECK16-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK16-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK16-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 +// CHECK16-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK16-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK16-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK16-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK16: .execute: +// CHECK16-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[TMP2]], i32* [[F_CASTED]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK16-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP3]]) #[[ATTR2]] +// CHECK16-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK16: .omp.deinit: +// CHECK16-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK16-NEXT: br label [[DOTEXIT:%.*]] +// CHECK16: .exit: +// CHECK16-NEXT: ret void +// CHECK16-LABEL: define {{[^@]+}}@__omp_outlined__6 +// CHECK16-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK16-NEXT: entry: +// CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK16-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK16-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 +// CHECK16-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK16-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 +// CHECK16-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK16-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK16-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK16-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK16-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK16: cond.true: +// CHECK16-NEXT: br label [[COND_END:%.*]] +// CHECK16: cond.false: +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: br label [[COND_END]] +// CHECK16: cond.end: +// CHECK16-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK16-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK16: omp.inner.for.cond: +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 +// CHECK16-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK16: omp.inner.for.body: +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[TMP9]], i32* [[F_CASTED]], align 4 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK16-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK16-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK16-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 +// CHECK16-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK16-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to i8* +// CHECK16-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 +// CHECK16-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK16-NEXT: [[TMP16:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* +// CHECK16-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4 +// CHECK16-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK16-NEXT: [[TMP18:%.*]] = inttoptr i32 [[TMP10]] to i8* +// CHECK16-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4 +// CHECK16-NEXT: [[TMP19:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK16-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x [10 x i32]]*, i32)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP19]], i32 4) +// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK16: omp.inner.for.inc: +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK16-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK16-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 +// CHECK16-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK16: cond.true6: +// CHECK16-NEXT: br label [[COND_END8:%.*]] +// CHECK16: cond.false7: +// CHECK16-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: br label [[COND_END8]] +// CHECK16: cond.end8: +// CHECK16-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] +// CHECK16-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK16: omp.inner.for.end: +// CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK16: omp.loop.exit: +// CHECK16-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK16-NEXT: ret void +// CHECK16-LABEL: define {{[^@]+}}@__omp_outlined__7 +// CHECK16-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK16-NEXT: entry: +// CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK16-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK16-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK16-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK16-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK16-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 +// CHECK16-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK16-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 +// CHECK16-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK16-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK16: omp.inner.for.cond: +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK16-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK16: omp.inner.for.body: +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 +// CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK16-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10 +// CHECK16-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]] +// CHECK16-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK16-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] +// CHECK16-NEXT: store i32 [[ADD5]], i32* [[J]], align 4 +// CHECK16-NEXT: store i32 10, i32* [[K]], align 4 +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK16-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] +// CHECK16-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]] +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 +// CHECK16-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]] +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP15]] +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 +// CHECK16-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP16]] +// CHECK16-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX9]], align 4 +// CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK16: omp.body.continue: +// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK16: omp.inner.for.inc: +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK16-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK16: omp.inner.for.end: +// CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK16: omp.loop.exit: +// CHECK16-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK16-NEXT: ret void +// CHECK16-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l67 +// CHECK16-SAME: (i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { +// CHECK16-NEXT: entry: +// CHECK16-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK16-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK16-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK16-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK16-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK16-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK16-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK16: .execute: +// CHECK16-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK16-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR2]] +// CHECK16-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK16: .omp.deinit: +// CHECK16-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK16-NEXT: br label [[DOTEXIT:%.*]] +// CHECK16: .exit: +// CHECK16-NEXT: ret void +// CHECK16-LABEL: define {{[^@]+}}@__omp_outlined__8 +// CHECK16-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { +// CHECK16-NEXT: entry: +// CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK16-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK16-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK16-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 +// CHECK16-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 +// CHECK16-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 +// CHECK16-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK16-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 +// CHECK16-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK16-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK16-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK16-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK16-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK16-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK16-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK16-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK16-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK16-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK16-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK16-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK16-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK16-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[J]], align 4 +// CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK16-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK16-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK16: land.lhs.true: +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK16-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK16-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK16: omp.precond.then: +// CHECK16-NEXT: store i64 0, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK16-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK16-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK16-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK16-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK16-NEXT: [[CONV11:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK16-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV11]]) +// CHECK16-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK16-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK16-NEXT: [[CMP12:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]] +// CHECK16-NEXT: br i1 [[CMP12]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK16: cond.true: +// CHECK16-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK16-NEXT: br label [[COND_END:%.*]] +// CHECK16: cond.false: +// CHECK16-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK16-NEXT: br label [[COND_END]] +// CHECK16: cond.end: +// CHECK16-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK16-NEXT: store i64 [[COND]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK16-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK16-NEXT: store i64 [[TMP14]], i64* [[DOTOMP_IV]], align 8 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK16: omp.inner.for.cond: +// CHECK16-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK16-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK16-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP16]], 1 +// CHECK16-NEXT: [[CMP13:%.*]] = icmp slt i64 [[TMP15]], [[ADD]] +// CHECK16-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK16: omp.inner.for.body: +// CHECK16-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK16-NEXT: [[TMP18:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK16-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK16-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[TMP21]], i32* [[N_CASTED]], align 4 +// CHECK16-NEXT: [[TMP22:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK16-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK16-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP18]] to i8* +// CHECK16-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 +// CHECK16-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK16-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP20]] to i8* +// CHECK16-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 +// CHECK16-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK16-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP22]] to i8* +// CHECK16-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 +// CHECK16-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK16-NEXT: [[TMP30:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* +// CHECK16-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 4 +// CHECK16-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK16-NEXT: [[TMP33:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK16-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [10 x [10 x i32]]*)* @__omp_outlined__9 to i8*), i8* null, i8** [[TMP33]], i32 4) +// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK16: omp.inner.for.inc: +// CHECK16-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK16-NEXT: [[TMP35:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK16-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP34]], [[TMP35]] +// CHECK16-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_IV]], align 8 +// CHECK16-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK16-NEXT: [[TMP37:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK16-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP36]], [[TMP37]] +// CHECK16-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK16-NEXT: [[TMP38:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK16-NEXT: [[TMP39:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK16-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], [[TMP39]] +// CHECK16-NEXT: store i64 [[ADD16]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK16-NEXT: [[TMP40:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK16-NEXT: [[TMP41:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK16-NEXT: [[CMP17:%.*]] = icmp sgt i64 [[TMP40]], [[TMP41]] +// CHECK16-NEXT: br i1 [[CMP17]], label [[COND_TRUE18:%.*]], label [[COND_FALSE19:%.*]] +// CHECK16: cond.true18: +// CHECK16-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK16-NEXT: br label [[COND_END20:%.*]] +// CHECK16: cond.false19: +// CHECK16-NEXT: [[TMP43:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK16-NEXT: br label [[COND_END20]] +// CHECK16: cond.end20: +// CHECK16-NEXT: [[COND21:%.*]] = phi i64 [ [[TMP42]], [[COND_TRUE18]] ], [ [[TMP43]], [[COND_FALSE19]] ] +// CHECK16-NEXT: store i64 [[COND21]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK16-NEXT: [[TMP44:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK16-NEXT: store i64 [[TMP44]], i64* [[DOTOMP_IV]], align 8 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK16: omp.inner.for.end: +// CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK16: omp.loop.exit: +// CHECK16-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 +// CHECK16-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP46]]) +// CHECK16-NEXT: br label [[OMP_PRECOND_END]] +// CHECK16: omp.precond.end: +// CHECK16-NEXT: ret void +// CHECK16-LABEL: define {{[^@]+}}@__omp_outlined__9 +// CHECK16-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { +// CHECK16-NEXT: entry: +// CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK16-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK16-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK16-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 +// CHECK16-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// CHECK16-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// CHECK16-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK16-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[I11:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK16-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK16-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK16-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK16-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK16-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK16-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK16-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK16-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK16-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK16-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK16-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK16-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK16-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK16-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK16-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[J]], align 4 +// CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK16-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK16-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK16: land.lhs.true: +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK16-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK16-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK16: omp.precond.then: +// CHECK16-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 +// CHECK16-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK16-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_UB]], align 8 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK16-NEXT: [[CONV9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK16-NEXT: [[CONV10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK16-NEXT: store i64 [[CONV9]], i64* [[DOTOMP_LB]], align 8 +// CHECK16-NEXT: store i64 [[CONV10]], i64* [[DOTOMP_UB]], align 8 +// CHECK16-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK16-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 33, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK16-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK16-NEXT: store i64 [[TMP12]], i64* [[DOTOMP_IV]], align 8 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK16: omp.inner.for.cond: +// CHECK16-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK16-NEXT: [[CONV13:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK16-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP13]], [[CONV13]] +// CHECK16-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK16: omp.inner.for.body: +// CHECK16-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK16-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP16]], 0 +// CHECK16-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK16-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// CHECK16-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// CHECK16-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP15]], [[CONV18]] +// CHECK16-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// CHECK16-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// CHECK16-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK16-NEXT: store i32 [[CONV21]], i32* [[I11]], align 4 +// CHECK16-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK16-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK16-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP19]], 0 +// CHECK16-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// CHECK16-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// CHECK16-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// CHECK16-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP18]], [[CONV25]] +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK16-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP20]], 0 +// CHECK16-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// CHECK16-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// CHECK16-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// CHECK16-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// CHECK16-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP17]], [[MUL31]] +// CHECK16-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// CHECK16-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// CHECK16-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// CHECK16-NEXT: store i32 [[CONV35]], i32* [[J12]], align 4 +// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[I11]], align 4 +// CHECK16-NEXT: [[TMP22:%.*]] = load i32, i32* [[J12]], align 4 +// CHECK16-NEXT: [[ADD36:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK16-NEXT: [[TMP23:%.*]] = load i32, i32* [[I11]], align 4 +// CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP23]] +// CHECK16-NEXT: [[TMP24:%.*]] = load i32, i32* [[J12]], align 4 +// CHECK16-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP24]] +// CHECK16-NEXT: store i32 [[ADD36]], i32* [[ARRAYIDX37]], align 4 +// CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK16: omp.body.continue: +// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK16: omp.inner.for.inc: +// CHECK16-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK16-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK16-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] +// CHECK16-NEXT: store i64 [[ADD38]], i64* [[DOTOMP_IV]], align 8 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK16: omp.inner.for.end: +// CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK16: omp.loop.exit: +// CHECK16-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK16-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) +// CHECK16-NEXT: br label [[OMP_PRECOND_END]] +// CHECK16: omp.precond.end: +// CHECK16-NEXT: ret void +// CHECK16-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74 +// CHECK16-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { +// CHECK16-NEXT: entry: +// CHECK16-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK16-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 4 +// CHECK16-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK16-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK16-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 +// CHECK16-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK16-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK16-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK16-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK16: .execute: +// CHECK16-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32*, i32** [[V_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK16-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i32]* [[TMP0]], i32* [[TMP4]]) #[[ATTR2]] +// CHECK16-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK16: .omp.deinit: +// CHECK16-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK16-NEXT: br label [[DOTEXIT:%.*]] +// CHECK16: .exit: +// CHECK16-NEXT: ret void +// CHECK16-LABEL: define {{[^@]+}}@__omp_outlined__10 +// CHECK16-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { +// CHECK16-NEXT: entry: +// CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK16-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK16-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 4 +// CHECK16-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK16-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK16-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK16-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK16-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 +// CHECK16-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK16-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK16-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK16-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK16-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK16-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK16-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK16: omp.precond.then: +// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK16-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK16-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK16-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK16-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK16-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK16-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK16: cond.true: +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK16-NEXT: br label [[COND_END:%.*]] +// CHECK16: cond.false: +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: br label [[COND_END]] +// CHECK16: cond.end: +// CHECK16-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK16-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK16: omp.inner.for.cond: +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK16-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK16-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK16: omp.inner.for.body: +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32*, i32** [[V_ADDR]], align 4 +// CHECK16-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK16-NEXT: [[TMP20:%.*]] = inttoptr i32 [[TMP14]] to i8* +// CHECK16-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 4 +// CHECK16-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK16-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP15]] to i8* +// CHECK16-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 4 +// CHECK16-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK16-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP17]] to i8* +// CHECK16-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 +// CHECK16-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK16-NEXT: [[TMP26:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* +// CHECK16-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 +// CHECK16-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 +// CHECK16-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP18]] to i8* +// CHECK16-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 +// CHECK16-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK16-NEXT: [[TMP31:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK16-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @__omp_outlined__11 to i8*), i8* null, i8** [[TMP31]], i32 5) +// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK16: omp.inner.for.inc: +// CHECK16-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK16-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK16-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK16-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK16-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] +// CHECK16-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK16: cond.true10: +// CHECK16-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK16-NEXT: br label [[COND_END12:%.*]] +// CHECK16: cond.false11: +// CHECK16-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: br label [[COND_END12]] +// CHECK16: cond.end12: +// CHECK16-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE10]] ], [ [[TMP41]], [[COND_FALSE11]] ] +// CHECK16-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: store i32 [[TMP42]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK16: omp.inner.for.end: +// CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK16: omp.loop.exit: +// CHECK16-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 +// CHECK16-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]]) +// CHECK16-NEXT: br label [[OMP_PRECOND_END]] +// CHECK16: omp.precond.end: +// CHECK16-NEXT: ret void +// CHECK16-LABEL: define {{[^@]+}}@__omp_outlined__11 +// CHECK16-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { +// CHECK16-NEXT: entry: +// CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK16-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK16-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 4 +// CHECK16-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK16-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK16-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK16-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK16-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK16-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK16-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 +// CHECK16-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK16-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK16-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK16-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK16-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK16-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK16-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK16: omp.precond.then: +// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK16-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK16-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK16-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK16-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK16: omp.inner.for.cond: +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK16-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] +// CHECK16-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK16: omp.inner.for.body: +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32*, i32** [[V_ADDR]], align 4 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i32 [[TMP14]] +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK16-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP16]] +// CHECK16-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX5]], align 4 +// CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK16: omp.body.continue: +// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK16: omp.inner.for.inc: +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK16-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK16: omp.inner.for.end: +// CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK16: omp.loop.exit: +// CHECK16-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK16-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) +// CHECK16-NEXT: br label [[OMP_PRECOND_END]] +// CHECK16: omp.precond.end: +// CHECK16-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28 +// CHECK1-SAME: (i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 +// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK1-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK1: .execute: +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[L_CASTED]] to i32* +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[L_CASTED]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]], [1000 x i32]* [[TMP0]], i64 [[TMP5]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK1: .omp.deinit: +// CHECK1-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK1-NEXT: br label [[DOTEXIT:%.*]] +// CHECK1: .exit: +// CHECK1-NEXT: ret void // // -// CHECK14-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK14-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK14-NEXT: entry: -// CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK14-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK14-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK14-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK14-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] -// CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK14-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK14-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 -// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK14: omp.body.continue: -// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK14-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK14: omp.inner.for.end: -// CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK14: omp.loop.exit: -// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK14-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 +// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* +// CHECK1-NEXT: [[L2:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: [[L_ON_STACK:%.*]] = bitcast i8* [[L2]] to i32* +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[L_CASTED]] to i32* +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[L_CASTED]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP15]] to i8* +// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP17]] to i8* +// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP19]] to i8* +// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP29:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* +// CHECK1-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 +// CHECK1-NEXT: [[TMP31:%.*]] = inttoptr i64 [[TMP21]] to i8* +// CHECK1-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i64)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP34]], i64 5) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] +// CHECK1-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] +// CHECK1: cond.true14: +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: br label [[COND_END16:%.*]] +// CHECK1: cond.false15: +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END16]] +// CHECK1: cond.end16: +// CHECK1-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE14]] ], [ [[TMP44]], [[COND_FALSE15]] ] +// CHECK1-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]]) +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +// CHECK1-NEXT: br i1 [[TMP49]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1: .omp.lastprivate.then: +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[TMP50]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK1: .omp.lastprivate.done: +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[L2]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 +// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK1: omp.dispatch.cond: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP10]] +// CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CONV9:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP11]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 +// CHECK1-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1: omp.dispatch.body: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK1: omp.dispatch.inc: +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK1: omp.dispatch.end: +// CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1: .omp.lastprivate.then: +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[TMP30]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK1: .omp.lastprivate.done: +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: ret void // // -// CHECK14-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59 -// CHECK14-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { -// CHECK14-NEXT: entry: -// CHECK14-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK14-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK14-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK14-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 -// CHECK14-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK14-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK14-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK14-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK14: .execute: -// CHECK14-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[F_ADDR]], align 4 -// CHECK14-NEXT: store i32 [[TMP2]], i32* [[F_CASTED]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[F_CASTED]], align 4 -// CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK14-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP3]]) #[[ATTR2]] -// CHECK14-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK14: .omp.deinit: -// CHECK14-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK14-NEXT: br label [[DOTEXIT:%.*]] -// CHECK14: .exit: -// CHECK14-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34 +// CHECK1-SAME: (i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 +// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK1-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK1: .execute: +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] +// CHECK1-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK1: .omp.deinit: +// CHECK1-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK1-NEXT: br label [[DOTEXIT:%.*]] +// CHECK1: .exit: +// CHECK1-NEXT: ret void // // -// CHECK14-LABEL: define {{[^@]+}}@__omp_outlined__6 -// CHECK14-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { -// CHECK14-NEXT: entry: -// CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK14-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK14-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 -// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK14-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK14-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 -// CHECK14-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK14-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 -// CHECK14-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK14: cond.true: -// CHECK14-NEXT: br label [[COND_END:%.*]] -// CHECK14: cond.false: -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: br label [[COND_END]] -// CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] -// CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 -// CHECK14-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ADDR]], align 4 -// CHECK14-NEXT: store i32 [[TMP9]], i32* [[F_CASTED]], align 4 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[F_CASTED]], align 4 -// CHECK14-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK14-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK14-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK14-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK14-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK14-NEXT: [[TMP16:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK14-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4 -// CHECK14-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK14-NEXT: [[TMP18:%.*]] = inttoptr i32 [[TMP10]] to i8* -// CHECK14-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4 -// CHECK14-NEXT: [[TMP19:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK14-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x [10 x i32]]*, i32)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP19]], i32 4) -// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK14-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK14-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 -// CHECK14-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] -// CHECK14: cond.true6: -// CHECK14-NEXT: br label [[COND_END8:%.*]] -// CHECK14: cond.false7: -// CHECK14-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: br label [[COND_END8]] -// CHECK14: cond.end8: -// CHECK14-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] -// CHECK14-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK14: omp.inner.for.end: -// CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK14: omp.loop.exit: -// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK14-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to i8* +// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to i8* +// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to i8* +// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* +// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP30]], i64 4) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK1: cond.true11: +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: br label [[COND_END13:%.*]] +// CHECK1: cond.false12: +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END13]] +// CHECK1: cond.end13: +// CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE11]] ], [ [[TMP40]], [[COND_FALSE12]] ] +// CHECK1-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP41]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP43]]) +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: ret void // // -// CHECK14-LABEL: define {{[^@]+}}@__omp_outlined__7 -// CHECK14-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { -// CHECK14-NEXT: entry: -// CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK14-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK14-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK14-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK14-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK14-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 -// CHECK14-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] -// CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10 -// CHECK14-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]] -// CHECK14-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 -// CHECK14-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] -// CHECK14-NEXT: store i32 [[ADD5]], i32* [[J]], align 4 -// CHECK14-NEXT: store i32 10, i32* [[K]], align 4 -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[F_ADDR]], align 4 -// CHECK14-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] -// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]] -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 -// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]] -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP15]] -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 -// CHECK14-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP16]] -// CHECK14-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX9]], align 4 -// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK14: omp.body.continue: -// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK14-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK14: omp.inner.for.end: -// CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK14: omp.loop.exit: -// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK14-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3 +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CONV6:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 +// CHECK1-NEXT: [[CONV8:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], 1 +// CHECK1-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i16 +// CHECK1-NEXT: store i16 [[CONV10]], i16* [[ARRAYIDX]], align 2 +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: ret void // // -// CHECK14-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l67 -// CHECK14-SAME: (i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { -// CHECK14-NEXT: entry: -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK14-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK14-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK14-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK14-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK14-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK14-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK14-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK14: .execute: -// CHECK14-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK14-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK14-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR2]] -// CHECK14-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK14: .omp.deinit: -// CHECK14-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK14-NEXT: br label [[DOTEXIT:%.*]] -// CHECK14: .exit: -// CHECK14-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39 +// CHECK1-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK1-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK1: .execute: +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] +// CHECK1-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK1: .omp.deinit: +// CHECK1-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK1-NEXT: br label [[DOTEXIT:%.*]] +// CHECK1: .exit: +// CHECK1-NEXT: ret void // // -// CHECK14-LABEL: define {{[^@]+}}@__omp_outlined__8 -// CHECK14-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { -// CHECK14-NEXT: entry: -// CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[I9:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[J10:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 -// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK14-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK14-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK14-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 -// CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK14-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK14-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 -// CHECK14-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] -// CHECK14-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK14-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK14-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK14-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] -// CHECK14-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK14: land.lhs.true: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]] -// CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] -// CHECK14: omp.precond.then: -// CHECK14-NEXT: store i64 0, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK14-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK14-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK14-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK14-NEXT: [[CONV11:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64 -// CHECK14-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV11]]) -// CHECK14-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK14-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK14-NEXT: [[CMP12:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]] -// CHECK14-NEXT: br i1 [[CMP12]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK14: cond.true: -// CHECK14-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK14-NEXT: br label [[COND_END:%.*]] -// CHECK14: cond.false: -// CHECK14-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK14-NEXT: br label [[COND_END]] -// CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] -// CHECK14-NEXT: store i64 [[COND]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK14-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK14-NEXT: store i64 [[TMP14]], i64* [[DOTOMP_IV]], align 8 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK14-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP16]], 1 -// CHECK14-NEXT: [[CMP13:%.*]] = icmp slt i64 [[TMP15]], [[ADD]] -// CHECK14-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK14-NEXT: [[TMP18:%.*]] = trunc i64 [[TMP17]] to i32 -// CHECK14-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK14-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK14-NEXT: store i32 [[TMP21]], i32* [[N_CASTED]], align 4 -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK14-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK14-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP18]] to i8* -// CHECK14-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 -// CHECK14-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK14-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP20]] to i8* -// CHECK14-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 -// CHECK14-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK14-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP22]] to i8* -// CHECK14-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 -// CHECK14-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK14-NEXT: [[TMP30:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK14-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 4 -// CHECK14-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK14-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 -// CHECK14-NEXT: [[TMP33:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK14-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [10 x [10 x i32]]*)* @__omp_outlined__9 to i8*), i8* null, i8** [[TMP33]], i32 4) -// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK14-NEXT: [[TMP35:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK14-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP34]], [[TMP35]] -// CHECK14-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_IV]], align 8 -// CHECK14-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK14-NEXT: [[TMP37:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK14-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP36]], [[TMP37]] -// CHECK14-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK14-NEXT: [[TMP38:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK14-NEXT: [[TMP39:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK14-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], [[TMP39]] -// CHECK14-NEXT: store i64 [[ADD16]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK14-NEXT: [[TMP40:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK14-NEXT: [[TMP41:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK14-NEXT: [[CMP17:%.*]] = icmp sgt i64 [[TMP40]], [[TMP41]] -// CHECK14-NEXT: br i1 [[CMP17]], label [[COND_TRUE18:%.*]], label [[COND_FALSE19:%.*]] -// CHECK14: cond.true18: -// CHECK14-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK14-NEXT: br label [[COND_END20:%.*]] -// CHECK14: cond.false19: -// CHECK14-NEXT: [[TMP43:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK14-NEXT: br label [[COND_END20]] -// CHECK14: cond.end20: -// CHECK14-NEXT: [[COND21:%.*]] = phi i64 [ [[TMP42]], [[COND_TRUE18]] ], [ [[TMP43]], [[COND_FALSE19]] ] -// CHECK14-NEXT: store i64 [[COND21]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK14-NEXT: [[TMP44:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK14-NEXT: store i64 [[TMP44]], i64* [[DOTOMP_IV]], align 8 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK14: omp.inner.for.end: -// CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK14-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP46]]) -// CHECK14-NEXT: br label [[OMP_PRECOND_END]] -// CHECK14: omp.precond.end: -// CHECK14-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__4 +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to i8* +// CHECK1-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to i8* +// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP17]], i64 3) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 +// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK1: cond.true5: +// CHECK1-NEXT: br label [[COND_END7:%.*]] +// CHECK1: cond.false6: +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END7]] +// CHECK1: cond.end7: +// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] +// CHECK1-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: ret void // // -// CHECK14-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK14-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { -// CHECK14-NEXT: entry: -// CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 -// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[J12:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK14-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK14-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK14-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK14-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK14-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK14-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 -// CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK14-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK14-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 -// CHECK14-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] -// CHECK14-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK14-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK14-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK14-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] -// CHECK14-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK14: land.lhs.true: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]] -// CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] -// CHECK14: omp.precond.then: -// CHECK14-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK14-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK14-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_UB]], align 8 -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK14-NEXT: [[CONV9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK14-NEXT: [[CONV10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK14-NEXT: store i64 [[CONV9]], i64* [[DOTOMP_LB]], align 8 -// CHECK14-NEXT: store i64 [[CONV10]], i64* [[DOTOMP_UB]], align 8 -// CHECK14-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 33, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK14-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK14-NEXT: store i64 [[TMP12]], i64* [[DOTOMP_IV]], align 8 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK14-NEXT: [[CONV13:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK14-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP13]], [[CONV13]] -// CHECK14-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP16]], 0 -// CHECK14-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK14-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK14-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK14-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP15]], [[CONV18]] -// CHECK14-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK14-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK14-NEXT: store i32 [[CONV21]], i32* [[I11]], align 4 -// CHECK14-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK14-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP19]], 0 -// CHECK14-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK14-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK14-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK14-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP18]], [[CONV25]] -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK14-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP20]], 0 -// CHECK14-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK14-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK14-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK14-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK14-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP17]], [[MUL31]] -// CHECK14-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK14-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK14-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK14-NEXT: store i32 [[CONV35]], i32* [[J12]], align 4 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[I11]], align 4 -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[J12]], align 4 -// CHECK14-NEXT: [[ADD36:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[I11]], align 4 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP23]] -// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[J12]], align 4 -// CHECK14-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP24]] -// CHECK14-NEXT: store i32 [[ADD36]], i32* [[ARRAYIDX37]], align 4 -// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK14: omp.body.continue: -// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK14-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK14-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] -// CHECK14-NEXT: store i64 [[ADD38]], i64* [[DOTOMP_IV]], align 8 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK14: omp.inner.for.end: -// CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK14-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) -// CHECK14-NEXT: br label [[OMP_PRECOND_END]] -// CHECK14: omp.precond.end: -// CHECK14-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__5 +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: ret void // // -// CHECK14-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74 -// CHECK14-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { -// CHECK14-NEXT: entry: -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK14-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 4 -// CHECK14-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK14-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK14-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK14-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 -// CHECK14-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK14-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK14-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK14-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK14: .execute: -// CHECK14-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK14-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32*, i32** [[V_ADDR]], align 4 -// CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK14-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i32]* [[TMP0]], i32* [[TMP4]]) #[[ATTR2]] -// CHECK14-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK14: .omp.deinit: -// CHECK14-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK14-NEXT: br label [[DOTEXIT:%.*]] -// CHECK14: .exit: -// CHECK14-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44 +// CHECK1-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 +// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK1-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK1: .execute: +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[F_CASTED]] to i32* +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[F_CASTED]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK1: .omp.deinit: +// CHECK1-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK1-NEXT: br label [[DOTEXIT:%.*]] +// CHECK1: .exit: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__6 +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 +// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[F_CASTED]] to i32* +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV3]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[F_CASTED]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to i8* +// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to i8* +// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* +// CHECK1-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP12]] to i8* +// CHECK1-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x [10 x i32]]*, i64)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP21]], i64 4) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP28]], 99 +// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] +// CHECK1: cond.true7: +// CHECK1-NEXT: br label [[COND_END9:%.*]] +// CHECK1: cond.false8: +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END9]] +// CHECK1: cond.end9: +// CHECK1-NEXT: [[COND10:%.*]] = phi i32 [ 99, [[COND_TRUE7]] ], [ [[TMP29]], [[COND_FALSE8]] ] +// CHECK1-NEXT: store i32 [[COND10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP30]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__7 +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 +// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CONV4:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV4]], [[TMP7]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV5:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[DIV5]], 10 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL6]] +// CHECK1-NEXT: [[MUL7:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL7]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[J]], align 4 +// CHECK1-NEXT: store i32 10, i32* [[K]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP11]], [[MUL9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD10]], [[TMP14]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 +// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP16]] to i64 +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM12]] +// CHECK1-NEXT: store i32 [[ADD11]], i32* [[ARRAYIDX13]], align 4 +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: ret void // // -// CHECK14-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK14-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { -// CHECK14-NEXT: entry: -// CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK14-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 4 -// CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 4 -// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK14-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK14-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK14-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 -// CHECK14-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK14-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK14-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK14: omp.precond.then: -// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK14-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] -// CHECK14-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK14: cond.true: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: br label [[COND_END:%.*]] -// CHECK14: cond.false: -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: br label [[COND_END]] -// CHECK14: cond.end: -// CHECK14-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] -// CHECK14-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK14-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] -// CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK14-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32*, i32** [[V_ADDR]], align 4 -// CHECK14-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK14-NEXT: [[TMP20:%.*]] = inttoptr i32 [[TMP14]] to i8* -// CHECK14-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 4 -// CHECK14-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK14-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP15]] to i8* -// CHECK14-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 4 -// CHECK14-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK14-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK14-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 -// CHECK14-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK14-NEXT: [[TMP26:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK14-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 -// CHECK14-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 -// CHECK14-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP18]] to i8* -// CHECK14-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 -// CHECK14-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK14-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK14-NEXT: [[TMP31:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK14-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @__omp_outlined__11 to i8*), i8* null, i8** [[TMP31]], i32 5) -// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK14-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] -// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] -// CHECK14-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] -// CHECK14: cond.true10: -// CHECK14-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: br label [[COND_END12:%.*]] -// CHECK14: cond.false11: -// CHECK14-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: br label [[COND_END12]] -// CHECK14: cond.end12: -// CHECK14-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE10]] ], [ [[TMP41]], [[COND_FALSE11]] ] -// CHECK14-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP42]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK14: omp.inner.for.end: -// CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK14-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]]) -// CHECK14-NEXT: br label [[OMP_PRECOND_END]] -// CHECK14: omp.precond.end: -// CHECK14-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52 +// CHECK1-SAME: (i64 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 +// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK1-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK1: .execute: +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR2]] +// CHECK1-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK1: .omp.deinit: +// CHECK1-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK1-NEXT: br label [[DOTEXIT:%.*]] +// CHECK1: .exit: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__8 +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I10:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J11:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[CONV4:%.*]] = sext i32 [[DIV]] to i64 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[SUB5:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK1-NEXT: [[DIV6:%.*]] = sdiv i32 [[SUB5]], 1 +// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[DIV6]] to i64 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV4]], [[CONV7]] +// CHECK1-NEXT: [[SUB8:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK1-NEXT: store i64 [[SUB8]], i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[J]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: land.lhs.true: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i64 0, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[CONV12:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV12]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[CMP13:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP13]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i64 [[COND]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP14]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP16]], 1 +// CHECK1-NEXT: [[CMP14:%.*]] = icmp slt i64 [[TMP15]], [[ADD]] +// CHECK1-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[CONV15:%.*]] = bitcast i64* [[N_CASTED]] to i32* +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[CONV15]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP17]] to i8* +// CHECK1-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP18]] to i8* +// CHECK1-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP20]] to i8* +// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* +// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [10 x [10 x i32]]*)* @__omp_outlined__9 to i8*), i8* null, i8** [[TMP31]], i64 4) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP32]], [[TMP33]] +// CHECK1-NEXT: store i64 [[ADD16]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i64 [[TMP34]], [[TMP35]] +// CHECK1-NEXT: store i64 [[ADD17]], i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP37:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD18:%.*]] = add nsw i64 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: store i64 [[ADD18]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP38:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP39:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: [[CMP19:%.*]] = icmp sgt i64 [[TMP38]], [[TMP39]] +// CHECK1-NEXT: br i1 [[CMP19]], label [[COND_TRUE20:%.*]], label [[COND_FALSE21:%.*]] +// CHECK1: cond.true20: +// CHECK1-NEXT: [[TMP40:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: br label [[COND_END22:%.*]] +// CHECK1: cond.false21: +// CHECK1-NEXT: [[TMP41:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: br label [[COND_END22]] +// CHECK1: cond.end22: +// CHECK1-NEXT: [[COND23:%.*]] = phi i64 [ [[TMP40]], [[COND_TRUE20]] ], [ [[TMP41]], [[COND_FALSE21]] ] +// CHECK1-NEXT: store i64 [[COND23]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP42]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]]) +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: ret void // // -// CHECK14-LABEL: define {{[^@]+}}@__omp_outlined__11 -// CHECK14-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { -// CHECK14-NEXT: entry: -// CHECK14-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK14-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK14-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK14-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 4 -// CHECK14-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK14-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK14-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK14-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK14-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK14-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK14-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK14-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 -// CHECK14-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK14-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK14-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK14-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK14-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK14-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK14-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK14-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK14: omp.precond.then: -// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK14-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK14-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK14-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] -// CHECK14-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 -// CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32*, i32** [[V_ADDR]], align 4 -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i32 [[TMP14]] -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK14-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP16]] -// CHECK14-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX5]], align 4 -// CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK14: omp.body.continue: -// CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK14-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK14-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK14: omp.inner.for.end: -// CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK14: omp.loop.exit: -// CHECK14-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK14-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK14-NEXT: br label [[OMP_PRECOND_END]] -// CHECK14: omp.precond.end: -// CHECK14-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__9 +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I10:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J11:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[CONV4:%.*]] = sext i32 [[DIV]] to i64 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[SUB5:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK1-NEXT: [[DIV6:%.*]] = sdiv i32 [[SUB5]], 1 +// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[DIV6]] to i64 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV4]], [[CONV7]] +// CHECK1-NEXT: [[SUB8:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK1-NEXT: store i64 [[SUB8]], i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[J]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK1-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: land.lhs.true: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[CMP9:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK1-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[TMP8]], i64* [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 33, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK1-NEXT: store i64 [[TMP12]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp ule i64 [[TMP13]], [[TMP14]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[SUB13:%.*]] = sub nsw i32 [[TMP16]], 0 +// CHECK1-NEXT: [[DIV14:%.*]] = sdiv i32 [[SUB13]], 1 +// CHECK1-NEXT: [[MUL15:%.*]] = mul nsw i32 1, [[DIV14]] +// CHECK1-NEXT: [[CONV16:%.*]] = sext i32 [[MUL15]] to i64 +// CHECK1-NEXT: [[DIV17:%.*]] = sdiv i64 [[TMP15]], [[CONV16]] +// CHECK1-NEXT: [[MUL18:%.*]] = mul nsw i64 [[DIV17]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL18]] +// CHECK1-NEXT: [[CONV19:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK1-NEXT: store i32 [[CONV19]], i32* [[I10]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[SUB20:%.*]] = sub nsw i32 [[TMP19]], 0 +// CHECK1-NEXT: [[DIV21:%.*]] = sdiv i32 [[SUB20]], 1 +// CHECK1-NEXT: [[MUL22:%.*]] = mul nsw i32 1, [[DIV21]] +// CHECK1-NEXT: [[CONV23:%.*]] = sext i32 [[MUL22]] to i64 +// CHECK1-NEXT: [[DIV24:%.*]] = sdiv i64 [[TMP18]], [[CONV23]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: [[SUB25:%.*]] = sub nsw i32 [[TMP20]], 0 +// CHECK1-NEXT: [[DIV26:%.*]] = sdiv i32 [[SUB25]], 1 +// CHECK1-NEXT: [[MUL27:%.*]] = mul nsw i32 1, [[DIV26]] +// CHECK1-NEXT: [[CONV28:%.*]] = sext i32 [[MUL27]] to i64 +// CHECK1-NEXT: [[MUL29:%.*]] = mul nsw i64 [[DIV24]], [[CONV28]] +// CHECK1-NEXT: [[SUB30:%.*]] = sub nsw i64 [[TMP17]], [[MUL29]] +// CHECK1-NEXT: [[MUL31:%.*]] = mul nsw i64 [[SUB30]], 1 +// CHECK1-NEXT: [[ADD32:%.*]] = add nsw i64 0, [[MUL31]] +// CHECK1-NEXT: [[CONV33:%.*]] = trunc i64 [[ADD32]] to i32 +// CHECK1-NEXT: store i32 [[CONV33]], i32* [[J11]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[I10]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[J11]], align 4 +// CHECK1-NEXT: [[ADD34:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[I10]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[J11]], align 4 +// CHECK1-NEXT: [[IDXPROM35:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK1-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM35]] +// CHECK1-NEXT: store i32 [[ADD34]], i32* [[ARRAYIDX36]], align 4 +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK1-NEXT: [[ADD37:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] +// CHECK1-NEXT: store i64 [[ADD37]], i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: ret void // // -// CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 -// CHECK15-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK15-NEXT: entry: -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK15-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK15-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK15-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK15: .execute: -// CHECK15-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], i32* [[L_CASTED]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[L_CASTED]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK15-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i32]* [[TMP0]], i32 [[TMP5]]) #[[ATTR2:[0-9]+]] -// CHECK15-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK15: .omp.deinit: -// CHECK15-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK15-NEXT: br label [[DOTEXIT:%.*]] -// CHECK15: .exit: -// CHECK15-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59 +// CHECK1-SAME: (i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 +// CHECK1-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 +// CHECK1-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK1-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK1: .execute: +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[V_ADDR]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]], [1000 x i32]* [[TMP0]], i32* [[TMP4]]) #[[ATTR2]] +// CHECK1-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK1: .omp.deinit: +// CHECK1-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK1-NEXT: br label [[DOTEXIT:%.*]] +// CHECK1: .exit: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__10 +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 +// CHECK1-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 +// CHECK1-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[V_ADDR]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP15]] to i8* +// CHECK1-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP17]] to i8* +// CHECK1-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP19]] to i8* +// CHECK1-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP28:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* +// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 +// CHECK1-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP20]] to i8* +// CHECK1-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @__omp_outlined__11 to i8*), i8* null, i8** [[TMP33]], i64 5) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] +// CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK1: cond.true11: +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: br label [[COND_END13:%.*]] +// CHECK1: cond.false12: +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END13]] +// CHECK1: cond.end13: +// CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE11]] ], [ [[TMP43]], [[COND_FALSE12]] ] +// CHECK1-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP44]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP46]]) +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__11 +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 +// CHECK1-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 +// CHECK1-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CONV6:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[V_ADDR]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK1-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP16]] to i64 +// CHECK1-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM8]] +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX9]], align 4 +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: ret void // // -// CHECK15-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK15-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { -// CHECK15-NEXT: entry: -// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK15-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 4 -// CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 4, i16 1) -// CHECK15-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct._globalized_locals_ty* -// CHECK15-NEXT: [[L1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP2]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK15-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK15-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] -// CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK15: omp.precond.then: -// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK15: cond.true: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: br label [[COND_END:%.*]] -// CHECK15: cond.false: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: br label [[COND_END]] -// CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] -// CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] -// CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP20]], i32* [[L_CASTED]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[L_CASTED]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP16]] to i8* -// CHECK15-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 -// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK15-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK15-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 -// CHECK15-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK15-NEXT: [[TMP27:%.*]] = inttoptr i32 [[TMP19]] to i8* -// CHECK15-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4 -// CHECK15-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK15-NEXT: [[TMP29:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK15-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4 -// CHECK15-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 -// CHECK15-NEXT: [[TMP31:%.*]] = inttoptr i32 [[TMP21]] to i8* -// CHECK15-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 4 -// CHECK15-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 -// CHECK15-NEXT: [[TMP34:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK15-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP34]], i32 5) -// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK15-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] -// CHECK15-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] -// CHECK15-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] -// CHECK15-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] -// CHECK15: cond.true11: -// CHECK15-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: br label [[COND_END13:%.*]] -// CHECK15: cond.false12: -// CHECK15-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: br label [[COND_END13]] -// CHECK15: cond.end13: -// CHECK15-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE11]] ], [ [[TMP44]], [[COND_FALSE12]] ] -// CHECK15-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK15: omp.inner.for.end: -// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]]) -// CHECK15-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 -// CHECK15-NEXT: br i1 [[TMP49]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK15: .omp.lastprivate.then: -// CHECK15-NEXT: [[TMP50:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP50]], i32* [[L_ADDR]], align 4 -// CHECK15-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK15: .omp.lastprivate.done: -// CHECK15-NEXT: br label [[OMP_PRECOND_END]] -// CHECK15: omp.precond.end: -// CHECK15-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP1]]) -// CHECK15-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28 +// CHECK2-SAME: (i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 +// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 +// CHECK2-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 +// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK2: .execute: +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[L_CASTED]] to i32* +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[L_CASTED]], align 8 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]], [1000 x i32]* [[TMP0]], i64 [[TMP5]]) #[[ATTR2:[0-9]+]] +// CHECK2-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK2: .omp.deinit: +// CHECK2-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK2-NEXT: br label [[DOTEXIT:%.*]] +// CHECK2: .exit: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 +// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 +// CHECK2-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 +// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* +// CHECK2-NEXT: [[L2:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK2-NEXT: [[L_ON_STACK:%.*]] = bitcast i8* [[L2]] to i32* +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK2: omp.precond.then: +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK2-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* +// CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[L_CASTED]] to i32* +// CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[L_CASTED]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP15]] to i8* +// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK2-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP17]] to i8* +// CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK2-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP19]] to i8* +// CHECK2-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 +// CHECK2-NEXT: [[TMP29:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* +// CHECK2-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 8 +// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 +// CHECK2-NEXT: [[TMP31:%.*]] = inttoptr i64 [[TMP21]] to i8* +// CHECK2-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i64)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP34]], i64 5) +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK2-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] +// CHECK2-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] +// CHECK2-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] +// CHECK2: cond.true14: +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: br label [[COND_END16:%.*]] +// CHECK2: cond.false15: +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END16]] +// CHECK2: cond.end16: +// CHECK2-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE14]] ], [ [[TMP44]], [[COND_FALSE15]] ] +// CHECK2-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]]) +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +// CHECK2-NEXT: br i1 [[TMP49]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK2: .omp.lastprivate.then: +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[TMP50]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK2: .omp.lastprivate.done: +// CHECK2-NEXT: br label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.end: +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[L2]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 +// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 +// CHECK2-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 +// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK2: omp.precond.then: +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK2-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) +// CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK2: omp.dispatch.cond: +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CONV7:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK2-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP10]] +// CHECK2-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CONV9:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP11]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 +// CHECK2-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK2: omp.dispatch.body: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK2-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK2: omp.dispatch.inc: +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK2: omp.dispatch.end: +// CHECK2-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK2-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK2: .omp.lastprivate.then: +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[TMP30]], i32* [[CONV1]], align 8 +// CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK2: .omp.lastprivate.done: +// CHECK2-NEXT: br label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.end: +// CHECK2-NEXT: ret void // // -// CHECK15-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK15-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { -// CHECK15-NEXT: entry: -// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK15-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK15-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK15: omp.precond.then: -// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) -// CHECK15-NEXT: br label [[OMP_DISPATCH_COND:%.*]] -// CHECK15: omp.dispatch.cond: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK15: cond.true: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: br label [[COND_END:%.*]] -// CHECK15: cond.false: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: br label [[COND_END]] -// CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] -// CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] -// CHECK15: omp.dispatch.body: -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK15-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK15-NEXT: store i32 [[TMP20]], i32* [[L_ADDR]], align 4 -// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK15: omp.body.continue: -// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK15-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK15: omp.inner.for.end: -// CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] -// CHECK15: omp.dispatch.inc: -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK15-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK15-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: br label [[OMP_DISPATCH_COND]] -// CHECK15: omp.dispatch.end: -// CHECK15-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) -// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK15-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK15: .omp.lastprivate.then: -// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP30]], i32* [[L_ADDR]], align 4 -// CHECK15-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK15: .omp.lastprivate.done: -// CHECK15-NEXT: br label [[OMP_PRECOND_END]] -// CHECK15: omp.precond.end: -// CHECK15-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34 +// CHECK2-SAME: (i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 +// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK2: .execute: +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] +// CHECK2-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK2: .omp.deinit: +// CHECK2-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK2-NEXT: br label [[DOTEXIT:%.*]] +// CHECK2: .exit: +// CHECK2-NEXT: ret void // // -// CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 -// CHECK15-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK15-NEXT: entry: -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK15-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK15-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK15-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK15: .execute: -// CHECK15-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK15-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] -// CHECK15-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK15: .omp.deinit: -// CHECK15-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK15-NEXT: br label [[DOTEXIT:%.*]] -// CHECK15: .exit: -// CHECK15-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK2: omp.precond.then: +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* +// CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to i8* +// CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to i8* +// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK2-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to i8* +// CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 +// CHECK2-NEXT: [[TMP27:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* +// CHECK2-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP30]], i64 4) +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]] +// CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK2: cond.true11: +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: br label [[COND_END13:%.*]] +// CHECK2: cond.false12: +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END13]] +// CHECK2: cond.end13: +// CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE11]] ], [ [[TMP40]], [[COND_FALSE12]] ] +// CHECK2-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP41]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP43]]) +// CHECK2-NEXT: br label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.end: +// CHECK2-NEXT: ret void // // -// CHECK15-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK15-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK15-NEXT: entry: -// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 -// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 -// CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK15-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK15: omp.precond.then: -// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK15-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] -// CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK15: cond.true: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: br label [[COND_END:%.*]] -// CHECK15: cond.false: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: br label [[COND_END]] -// CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] -// CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] -// CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to i8* -// CHECK15-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK15-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to i8* -// CHECK15-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK15-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK15-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 -// CHECK15-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK15-NEXT: [[TMP25:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* -// CHECK15-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 -// CHECK15-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK15-NEXT: [[TMP28:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK15-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP28]], i32 4) -// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] -// CHECK15-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK15-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK15-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] -// CHECK15-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] -// CHECK15: cond.true10: -// CHECK15-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: br label [[COND_END12:%.*]] -// CHECK15: cond.false11: -// CHECK15-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: br label [[COND_END12]] -// CHECK15: cond.end12: -// CHECK15-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] -// CHECK15-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP39]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK15: omp.inner.for.end: -// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP41]]) -// CHECK15-NEXT: br label [[OMP_PRECOND_END]] -// CHECK15: omp.precond.end: -// CHECK15-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__3 +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK2: omp.precond.then: +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK2-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CONV6:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP11]] +// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 +// CHECK2-NEXT: [[CONV8:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], 1 +// CHECK2-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i16 +// CHECK2-NEXT: store i16 [[CONV10]], i16* [[ARRAYIDX]], align 2 +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) +// CHECK2-NEXT: br label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.end: +// CHECK2-NEXT: ret void // // -// CHECK15-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK15-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK15-NEXT: entry: -// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 -// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK15-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK15: omp.precond.then: -// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] -// CHECK15-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK15-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 -// CHECK15-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 -// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1 -// CHECK15-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK15-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX]], align 2 -// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK15: omp.body.continue: -// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK15-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK15: omp.inner.for.end: -// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK15-NEXT: br label [[OMP_PRECOND_END]] -// CHECK15: omp.precond.end: -// CHECK15-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39 +// CHECK2-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK2: .execute: +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] +// CHECK2-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK2: .omp.deinit: +// CHECK2-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK2-NEXT: br label [[DOTEXIT:%.*]] +// CHECK2: .exit: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__4 +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to i8* +// CHECK2-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK2-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to i8* +// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK2-NEXT: [[TMP16:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK2-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP17]], i64 3) +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 +// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK2: cond.true5: +// CHECK2-NEXT: br label [[COND_END7:%.*]] +// CHECK2: cond.false6: +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END7]] +// CHECK2: cond.end7: +// CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] +// CHECK2-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__5 +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK2-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]] +// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: ret void // // -// CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 -// CHECK15-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK15-NEXT: entry: -// CHECK15-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK15-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK15-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK15-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK15-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK15: .execute: -// CHECK15-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK15-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] -// CHECK15-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK15: .omp.deinit: -// CHECK15-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK15-NEXT: br label [[DOTEXIT:%.*]] -// CHECK15: .exit: -// CHECK15-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44 +// CHECK2-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 +// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 +// CHECK2-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK2: .execute: +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[F_CASTED]] to i32* +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[F_CASTED]], align 8 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP3]]) #[[ATTR2]] +// CHECK2-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK2: .omp.deinit: +// CHECK2-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK2-NEXT: br label [[DOTEXIT:%.*]] +// CHECK2: .exit: +// CHECK2-NEXT: ret void // // -// CHECK15-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK15-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK15-NEXT: entry: -// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 -// CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 -// CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK15: cond.true: -// CHECK15-NEXT: br label [[COND_END:%.*]] -// CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: br label [[COND_END]] -// CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] -// CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 -// CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK15-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK15-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK15-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK15-NEXT: [[TMP14:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK15-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK15-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP15]], i32 3) -// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK15-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK15-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 -// CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK15: cond.true5: -// CHECK15-NEXT: br label [[COND_END7:%.*]] -// CHECK15: cond.false6: -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: br label [[COND_END7]] -// CHECK15: cond.end7: -// CHECK15-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] -// CHECK15-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK15: omp.inner.for.end: -// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK15-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__6 +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 +// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 +// CHECK2-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 +// CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[F_CASTED]] to i32* +// CHECK2-NEXT: store i32 [[TMP11]], i32* [[CONV3]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[F_CASTED]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to i8* +// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK2-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to i8* +// CHECK2-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK2-NEXT: [[TMP18:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* +// CHECK2-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 +// CHECK2-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP12]] to i8* +// CHECK2-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x [10 x i32]]*, i64)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP21]], i64 4) +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK2-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP28]], 99 +// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] +// CHECK2: cond.true7: +// CHECK2-NEXT: br label [[COND_END9:%.*]] +// CHECK2: cond.false8: +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END9]] +// CHECK2: cond.end9: +// CHECK2-NEXT: [[COND10:%.*]] = phi i32 [ 99, [[COND_TRUE7]] ], [ [[TMP29]], [[COND_FALSE8]] ] +// CHECK2-NEXT: store i32 [[COND10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP30]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK2-NEXT: ret void // // -// CHECK15-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK15-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK15-NEXT: entry: -// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] -// CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 -// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK15: omp.body.continue: -// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK15-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK15: omp.inner.for.end: -// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK15-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__7 +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 +// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 +// CHECK2-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 +// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK2-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CONV4:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV4]], [[TMP7]] +// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV5:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[DIV5]], 10 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL6]] +// CHECK2-NEXT: [[MUL7:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL7]] +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[J]], align 4 +// CHECK2-NEXT: store i32 10, i32* [[K]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP11]], [[MUL9]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 +// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD10]], [[TMP14]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 +// CHECK2-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP16]] to i64 +// CHECK2-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM12]] +// CHECK2-NEXT: store i32 [[ADD11]], i32* [[ARRAYIDX13]], align 4 +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: ret void // // -// CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59 -// CHECK15-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { -// CHECK15-NEXT: entry: -// CHECK15-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK15-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK15-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK15-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK15-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK15-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK15: .execute: -// CHECK15-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[F_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[F_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[F_CASTED]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK15-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP3]]) #[[ATTR2]] -// CHECK15-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK15: .omp.deinit: -// CHECK15-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK15-NEXT: br label [[DOTEXIT:%.*]] -// CHECK15: .exit: -// CHECK15-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52 +// CHECK2-SAME: (i64 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 +// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK2: .execute: +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR2]] +// CHECK2-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK2: .omp.deinit: +// CHECK2-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK2-NEXT: br label [[DOTEXIT:%.*]] +// CHECK2: .exit: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__8 +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I8:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J9:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK2-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], [[DIV5]] +// CHECK2-NEXT: [[SUB6:%.*]] = sub nsw i32 [[MUL]], 1 +// CHECK2-NEXT: store i32 [[SUB6]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[J]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK2-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK2: land.lhs.true: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP7:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.then: +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 +// CHECK2-NEXT: [[CMP11:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] +// CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[CONV12:%.*]] = bitcast i64* [[N_CASTED]] to i32* +// CHECK2-NEXT: store i32 [[TMP21]], i32* [[CONV12]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP18]] to i8* +// CHECK2-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK2-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP20]] to i8* +// CHECK2-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK2-NEXT: [[TMP28:%.*]] = inttoptr i64 [[TMP22]] to i8* +// CHECK2-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 +// CHECK2-NEXT: [[TMP30:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* +// CHECK2-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [10 x [10 x i32]]*)* @__omp_outlined__9 to i8*), i8* null, i8** [[TMP33]], i64 4) +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK2-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK2-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: [[CMP16:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] +// CHECK2-NEXT: br i1 [[CMP16]], label [[COND_TRUE17:%.*]], label [[COND_FALSE18:%.*]] +// CHECK2: cond.true17: +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: br label [[COND_END19:%.*]] +// CHECK2: cond.false18: +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END19]] +// CHECK2: cond.end19: +// CHECK2-NEXT: [[COND20:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE17]] ], [ [[TMP43]], [[COND_FALSE18]] ] +// CHECK2-NEXT: store i32 [[COND20]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP44]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP46]]) +// CHECK2-NEXT: br label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.end: +// CHECK2-NEXT: ret void // // -// CHECK15-LABEL: define {{[^@]+}}@__omp_outlined__6 -// CHECK15-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { -// CHECK15-NEXT: entry: -// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK15-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 -// CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK15-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 -// CHECK15-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK15: cond.true: -// CHECK15-NEXT: br label [[COND_END:%.*]] -// CHECK15: cond.false: -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: br label [[COND_END]] -// CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] -// CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 -// CHECK15-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP9]], i32* [[F_CASTED]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[F_CASTED]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK15-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK15-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK15-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK15-NEXT: [[TMP16:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK15-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK15-NEXT: [[TMP18:%.*]] = inttoptr i32 [[TMP10]] to i8* -// CHECK15-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK15-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x [10 x i32]]*, i32)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP19]], i32 4) -// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK15-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK15-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 -// CHECK15-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] -// CHECK15: cond.true6: -// CHECK15-NEXT: br label [[COND_END8:%.*]] -// CHECK15: cond.false7: -// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: br label [[COND_END8]] -// CHECK15: cond.end8: -// CHECK15-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] -// CHECK15-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK15: omp.inner.for.end: -// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK15-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__9 +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I10:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J11:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK2-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], [[DIV5]] +// CHECK2-NEXT: [[SUB6:%.*]] = sub nsw i32 [[MUL]], 1 +// CHECK2-NEXT: store i32 [[SUB6]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[J]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK2-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK2: land.lhs.true: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP7:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.then: +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV8:%.*]] = trunc i64 [[TMP8]] to i32 +// CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV9:%.*]] = trunc i64 [[TMP9]] to i32 +// CHECK2-NEXT: store i32 [[CONV8]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[CONV9]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CONV12:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CMP13:%.*]] = icmp ule i64 [[CONV12]], [[TMP14]] +// CHECK2-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP16]], 0 +// CHECK2-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 +// CHECK2-NEXT: [[MUL16:%.*]] = mul nsw i32 1, [[DIV15]] +// CHECK2-NEXT: [[DIV17:%.*]] = sdiv i32 [[TMP15]], [[MUL16]] +// CHECK2-NEXT: [[MUL18:%.*]] = mul nsw i32 [[DIV17]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL18]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I10]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[SUB19:%.*]] = sub nsw i32 [[TMP19]], 0 +// CHECK2-NEXT: [[DIV20:%.*]] = sdiv i32 [[SUB19]], 1 +// CHECK2-NEXT: [[MUL21:%.*]] = mul nsw i32 1, [[DIV20]] +// CHECK2-NEXT: [[DIV22:%.*]] = sdiv i32 [[TMP18]], [[MUL21]] +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[SUB23:%.*]] = sub nsw i32 [[TMP20]], 0 +// CHECK2-NEXT: [[DIV24:%.*]] = sdiv i32 [[SUB23]], 1 +// CHECK2-NEXT: [[MUL25:%.*]] = mul nsw i32 1, [[DIV24]] +// CHECK2-NEXT: [[MUL26:%.*]] = mul nsw i32 [[DIV22]], [[MUL25]] +// CHECK2-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP17]], [[MUL26]] +// CHECK2-NEXT: [[MUL28:%.*]] = mul nsw i32 [[SUB27]], 1 +// CHECK2-NEXT: [[ADD29:%.*]] = add nsw i32 0, [[MUL28]] +// CHECK2-NEXT: store i32 [[ADD29]], i32* [[J11]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[I10]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[J11]], align 4 +// CHECK2-NEXT: [[ADD30:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[I10]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP23]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[J11]], align 4 +// CHECK2-NEXT: [[IDXPROM31:%.*]] = sext i32 [[TMP24]] to i64 +// CHECK2-NEXT: [[ARRAYIDX32:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM31]] +// CHECK2-NEXT: store i32 [[ADD30]], i32* [[ARRAYIDX32]], align 4 +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD33:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] +// CHECK2-NEXT: store i32 [[ADD33]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) +// CHECK2-NEXT: br label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.end: +// CHECK2-NEXT: ret void // // -// CHECK15-LABEL: define {{[^@]+}}@__omp_outlined__7 -// CHECK15-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { -// CHECK15-NEXT: entry: -// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK15-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] -// CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10 -// CHECK15-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]] -// CHECK15-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 -// CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] -// CHECK15-NEXT: store i32 [[ADD5]], i32* [[J]], align 4 -// CHECK15-NEXT: store i32 10, i32* [[K]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[F_ADDR]], align 4 -// CHECK15-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] -// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]] -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 -// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]] -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP15]] -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 -// CHECK15-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP16]] -// CHECK15-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX9]], align 4 -// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK15: omp.body.continue: -// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK15-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK15: omp.inner.for.end: -// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK15: omp.loop.exit: -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK15-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59 +// CHECK2-SAME: (i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 +// CHECK2-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 +// CHECK2-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK2: .execute: +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[V_ADDR]], align 8 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]], [1000 x i32]* [[TMP0]], i32* [[TMP4]]) #[[ATTR2]] +// CHECK2-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK2: .omp.deinit: +// CHECK2-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK2-NEXT: br label [[DOTEXIT:%.*]] +// CHECK2: .exit: +// CHECK2-NEXT: ret void // // -// CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l67 -// CHECK15-SAME: (i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { -// CHECK15-NEXT: entry: -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK15-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK15-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK15-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK15: .execute: -// CHECK15-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK15-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR2]] -// CHECK15-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK15: .omp.deinit: -// CHECK15-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK15-NEXT: br label [[DOTEXIT:%.*]] -// CHECK15: .exit: -// CHECK15-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__10 +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 +// CHECK2-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 +// CHECK2-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK2: omp.precond.then: +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* +// CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32*, i32** [[V_ADDR]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK2-NEXT: [[TMP22:%.*]] = inttoptr i64 [[TMP15]] to i8* +// CHECK2-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK2-NEXT: [[TMP24:%.*]] = inttoptr i64 [[TMP17]] to i8* +// CHECK2-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK2-NEXT: [[TMP26:%.*]] = inttoptr i64 [[TMP19]] to i8* +// CHECK2-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 8 +// CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 +// CHECK2-NEXT: [[TMP28:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* +// CHECK2-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 +// CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 +// CHECK2-NEXT: [[TMP30:%.*]] = bitcast i32* [[TMP20]] to i8* +// CHECK2-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i32*)* @__omp_outlined__11 to i8*), i8* null, i8** [[TMP33]], i64 5) +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] +// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP40]], [[TMP41]] +// CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK2: cond.true11: +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: br label [[COND_END13:%.*]] +// CHECK2: cond.false12: +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END13]] +// CHECK2: cond.end13: +// CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP42]], [[COND_TRUE11]] ], [ [[TMP43]], [[COND_FALSE12]] ] +// CHECK2-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP44]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP46]]) +// CHECK2-NEXT: br label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.end: +// CHECK2-NEXT: ret void // // -// CHECK15-LABEL: define {{[^@]+}}@__omp_outlined__8 -// CHECK15-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { -// CHECK15-NEXT: entry: -// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 -// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 -// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 -// CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 -// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 -// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[I9:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[J10:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 -// CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 -// CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK15-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK15-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 -// CHECK15-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] -// CHECK15-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK15-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] -// CHECK15-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK15: land.lhs.true: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]] -// CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] -// CHECK15: omp.precond.then: -// CHECK15-NEXT: store i64 0, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK15-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK15-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK15-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK15-NEXT: [[CONV11:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV11]]) -// CHECK15-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK15-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK15-NEXT: [[CMP12:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]] -// CHECK15-NEXT: br i1 [[CMP12]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK15: cond.true: -// CHECK15-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK15-NEXT: br label [[COND_END:%.*]] -// CHECK15: cond.false: -// CHECK15-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK15-NEXT: br label [[COND_END]] -// CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] -// CHECK15-NEXT: store i64 [[COND]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK15-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK15-NEXT: store i64 [[TMP14]], i64* [[DOTOMP_IV]], align 8 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK15-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP16]], 1 -// CHECK15-NEXT: [[CMP13:%.*]] = icmp slt i64 [[TMP15]], [[ADD]] -// CHECK15-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK15-NEXT: [[TMP18:%.*]] = trunc i64 [[TMP17]] to i32 -// CHECK15-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK15-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP21]], i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP18]] to i8* -// CHECK15-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 -// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK15-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP20]] to i8* -// CHECK15-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 -// CHECK15-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK15-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP22]] to i8* -// CHECK15-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 -// CHECK15-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK15-NEXT: [[TMP30:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK15-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 4 -// CHECK15-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 -// CHECK15-NEXT: [[TMP33:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK15-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [10 x [10 x i32]]*)* @__omp_outlined__9 to i8*), i8* null, i8** [[TMP33]], i32 4) -// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK15-NEXT: [[TMP35:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK15-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP34]], [[TMP35]] -// CHECK15-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_IV]], align 8 -// CHECK15-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK15-NEXT: [[TMP37:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK15-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP36]], [[TMP37]] -// CHECK15-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK15-NEXT: [[TMP38:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK15-NEXT: [[TMP39:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK15-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], [[TMP39]] -// CHECK15-NEXT: store i64 [[ADD16]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK15-NEXT: [[TMP40:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK15-NEXT: [[TMP41:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK15-NEXT: [[CMP17:%.*]] = icmp sgt i64 [[TMP40]], [[TMP41]] -// CHECK15-NEXT: br i1 [[CMP17]], label [[COND_TRUE18:%.*]], label [[COND_FALSE19:%.*]] -// CHECK15: cond.true18: -// CHECK15-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK15-NEXT: br label [[COND_END20:%.*]] -// CHECK15: cond.false19: -// CHECK15-NEXT: [[TMP43:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK15-NEXT: br label [[COND_END20]] -// CHECK15: cond.end20: -// CHECK15-NEXT: [[COND21:%.*]] = phi i64 [ [[TMP42]], [[COND_TRUE18]] ], [ [[TMP43]], [[COND_FALSE19]] ] -// CHECK15-NEXT: store i64 [[COND21]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK15-NEXT: [[TMP44:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK15-NEXT: store i64 [[TMP44]], i64* [[DOTOMP_IV]], align 8 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK15: omp.inner.for.end: -// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP46]]) -// CHECK15-NEXT: br label [[OMP_PRECOND_END]] -// CHECK15: omp.precond.end: -// CHECK15-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__11 +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 +// CHECK2-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 8 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 +// CHECK2-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 8 +// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK2: omp.precond.then: +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK2-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CONV6:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK2-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP11]] +// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[V_ADDR]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP14]] to i64 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i64 [[IDXPROM]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK2-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP16]] to i64 +// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM8]] +// CHECK2-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX9]], align 4 +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) +// CHECK2-NEXT: br label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.end: +// CHECK2-NEXT: ret void // // -// CHECK15-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK15-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { -// CHECK15-NEXT: entry: -// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 -// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 -// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 -// CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 -// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 -// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[J12:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 -// CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK15-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK15-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 -// CHECK15-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] -// CHECK15-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK15-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] -// CHECK15-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK15: land.lhs.true: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]] -// CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] -// CHECK15: omp.precond.then: -// CHECK15-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK15-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK15-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_UB]], align 8 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[CONV9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: [[CONV10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK15-NEXT: store i64 [[CONV9]], i64* [[DOTOMP_LB]], align 8 -// CHECK15-NEXT: store i64 [[CONV10]], i64* [[DOTOMP_UB]], align 8 -// CHECK15-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 33, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK15-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK15-NEXT: store i64 [[TMP12]], i64* [[DOTOMP_IV]], align 8 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: [[CONV13:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK15-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP13]], [[CONV13]] -// CHECK15-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP16]], 0 -// CHECK15-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK15-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK15-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK15-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP15]], [[CONV18]] -// CHECK15-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK15-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK15-NEXT: store i32 [[CONV21]], i32* [[I11]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK15-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP19]], 0 -// CHECK15-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK15-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK15-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK15-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP18]], [[CONV25]] -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK15-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP20]], 0 -// CHECK15-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK15-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK15-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK15-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK15-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP17]], [[MUL31]] -// CHECK15-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK15-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK15-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK15-NEXT: store i32 [[CONV35]], i32* [[J12]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[I11]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[J12]], align 4 -// CHECK15-NEXT: [[ADD36:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[I11]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP23]] -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[J12]], align 4 -// CHECK15-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP24]] -// CHECK15-NEXT: store i32 [[ADD36]], i32* [[ARRAYIDX37]], align 4 -// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK15: omp.body.continue: -// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK15-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK15-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] -// CHECK15-NEXT: store i64 [[ADD38]], i64* [[DOTOMP_IV]], align 8 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK15: omp.inner.for.end: -// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) -// CHECK15-NEXT: br label [[OMP_PRECOND_END]] -// CHECK15: omp.precond.end: -// CHECK15-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28 +// CHECK3-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK3-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK3: .execute: +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[L_CASTED]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i32]* [[TMP0]], i32 [[TMP5]]) #[[ATTR2:[0-9]+]] +// CHECK3-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK3: .omp.deinit: +// CHECK3-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK3-NEXT: br label [[DOTEXIT:%.*]] +// CHECK3: .exit: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK3-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK3-NEXT: [[L1:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK3-NEXT: [[L_ON_STACK:%.*]] = bitcast i8* [[L1]] to i32* +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK3: omp.precond.then: +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[L_CASTED]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP14]] to i8* +// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP15]] to i8* +// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to i8* +// CHECK3-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP27:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* +// CHECK3-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP29:%.*]] = inttoptr i32 [[TMP19]] to i8* +// CHECK3-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP32]], i32 5) +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] +// CHECK3-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK3: cond.true11: +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: br label [[COND_END13:%.*]] +// CHECK3: cond.false12: +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END13]] +// CHECK3: cond.end13: +// CHECK3-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE11]] ], [ [[TMP42]], [[COND_FALSE12]] ] +// CHECK3-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP43]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP45]]) +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 +// CHECK3-NEXT: br i1 [[TMP47]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3: .omp.lastprivate.then: +// CHECK3-NEXT: [[TMP48:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP48]], i32* [[L_ADDR]], align 4 +// CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK3: .omp.lastprivate.done: +// CHECK3-NEXT: br label [[OMP_PRECOND_END]] +// CHECK3: omp.precond.end: +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[L1]]) +// CHECK3-NEXT: ret void // // -// CHECK15-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74 -// CHECK15-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { -// CHECK15-NEXT: entry: -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK15-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK15-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK15-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK15: .execute: -// CHECK15-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32*, i32** [[V_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK15-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i32]* [[TMP0]], i32* [[TMP4]]) #[[ATTR2]] -// CHECK15-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK15: .omp.deinit: -// CHECK15-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK15-NEXT: br label [[DOTEXIT:%.*]] -// CHECK15: .exit: -// CHECK15-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK3-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK3: omp.precond.then: +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) +// CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK3: omp.dispatch.cond: +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK3: omp.dispatch.body: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] +// CHECK3-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[L_ADDR]], align 4 +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3: omp.body.continue: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK3: omp.dispatch.inc: +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK3: omp.dispatch.end: +// CHECK3-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3: .omp.lastprivate.then: +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP30]], i32* [[L_ADDR]], align 4 +// CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK3: .omp.lastprivate.done: +// CHECK3-NEXT: br label [[OMP_PRECOND_END]] +// CHECK3: omp.precond.end: +// CHECK3-NEXT: ret void // // -// CHECK15-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK15-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { -// CHECK15-NEXT: entry: -// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK15-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 4 -// CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK15-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK15: omp.precond.then: -// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK15-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] -// CHECK15-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK15: cond.true: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: br label [[COND_END:%.*]] -// CHECK15: cond.false: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: br label [[COND_END]] -// CHECK15: cond.end: -// CHECK15-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] -// CHECK15-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK15-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] -// CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32*, i32** [[V_ADDR]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK15-NEXT: [[TMP20:%.*]] = inttoptr i32 [[TMP14]] to i8* -// CHECK15-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK15-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP15]] to i8* -// CHECK15-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK15-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK15-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 -// CHECK15-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK15-NEXT: [[TMP26:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK15-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 -// CHECK15-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 -// CHECK15-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP18]] to i8* -// CHECK15-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 -// CHECK15-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK15-NEXT: [[TMP31:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK15-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @__omp_outlined__11 to i8*), i8* null, i8** [[TMP31]], i32 5) -// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK15-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK15-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] -// CHECK15-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] -// CHECK15-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] -// CHECK15: cond.true10: -// CHECK15-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: br label [[COND_END12:%.*]] -// CHECK15: cond.false11: -// CHECK15-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: br label [[COND_END12]] -// CHECK15: cond.end12: -// CHECK15-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE10]] ], [ [[TMP41]], [[COND_FALSE11]] ] -// CHECK15-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP42]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK15: omp.inner.for.end: -// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]]) -// CHECK15-NEXT: br label [[OMP_PRECOND_END]] -// CHECK15: omp.precond.end: -// CHECK15-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34 +// CHECK3-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK3: .execute: +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] +// CHECK3-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK3: .omp.deinit: +// CHECK3-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK3-NEXT: br label [[DOTEXIT:%.*]] +// CHECK3: .exit: +// CHECK3-NEXT: ret void // // -// CHECK15-LABEL: define {{[^@]+}}@__omp_outlined__11 -// CHECK15-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { -// CHECK15-NEXT: entry: -// CHECK15-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK15-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 4 -// CHECK15-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK15-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 -// CHECK15-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK15-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK15-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK15-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK15-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK15-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK15-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK15: omp.precond.then: -// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK15-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK15-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK15-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] -// CHECK15-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 -// CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32*, i32** [[V_ADDR]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i32 [[TMP14]] -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK15-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP16]] -// CHECK15-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX5]], align 4 -// CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK15: omp.body.continue: -// CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK15-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK15: omp.inner.for.end: -// CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK15: omp.loop.exit: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK15-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK15-NEXT: br label [[OMP_PRECOND_END]] -// CHECK15: omp.precond.end: -// CHECK15-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK3: omp.precond.then: +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to i8* +// CHECK3-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to i8* +// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to i8* +// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP25:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* +// CHECK3-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP28]], i32 4) +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] +// CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK3: cond.true10: +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: br label [[COND_END12:%.*]] +// CHECK3: cond.false11: +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END12]] +// CHECK3: cond.end12: +// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] +// CHECK3-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP39]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP41]]) +// CHECK3-NEXT: br label [[OMP_PRECOND_END]] +// CHECK3: omp.precond.end: +// CHECK3-NEXT: ret void // // -// CHECK16-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l43 -// CHECK16-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK16-NEXT: entry: -// CHECK16-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK16-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 -// CHECK16-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK16-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK16-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK16-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK16: .execute: -// CHECK16-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP4]], i32* [[L_CASTED]], align 4 -// CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[L_CASTED]], align 4 -// CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK16-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i32]* [[TMP0]], i32 [[TMP5]]) #[[ATTR2:[0-9]+]] -// CHECK16-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK16: .omp.deinit: -// CHECK16-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK16-NEXT: br label [[DOTEXIT:%.*]] -// CHECK16: .exit: -// CHECK16-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__3 +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK3: omp.precond.then: +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK3-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 +// CHECK3-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX]], align 2 +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3: omp.body.continue: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) +// CHECK3-NEXT: br label [[OMP_PRECOND_END]] +// CHECK3: omp.precond.end: +// CHECK3-NEXT: ret void // // -// CHECK16-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK16-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { -// CHECK16-NEXT: entry: -// CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK16-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK16-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 4 -// CHECK16-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK16-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 -// CHECK16-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK16-NEXT: [[TMP1:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 4, i16 1) -// CHECK16-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct._globalized_locals_ty* -// CHECK16-NEXT: [[L1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP2]], i32 0, i32 0 -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK16-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK16-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK16-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK16-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK16-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK16-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] -// CHECK16-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK16: omp.precond.then: -// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK16-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK16-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK16-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK16-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK16-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK16: cond.true: -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK16-NEXT: br label [[COND_END:%.*]] -// CHECK16: cond.false: -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: br label [[COND_END]] -// CHECK16: cond.end: -// CHECK16-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] -// CHECK16-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK16-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] -// CHECK16-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4 -// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP20]], i32* [[L_CASTED]], align 4 -// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[L_CASTED]], align 4 -// CHECK16-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK16-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP16]] to i8* -// CHECK16-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 -// CHECK16-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK16-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK16-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 -// CHECK16-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK16-NEXT: [[TMP27:%.*]] = inttoptr i32 [[TMP19]] to i8* -// CHECK16-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4 -// CHECK16-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK16-NEXT: [[TMP29:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK16-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4 -// CHECK16-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 -// CHECK16-NEXT: [[TMP31:%.*]] = inttoptr i32 [[TMP21]] to i8* -// CHECK16-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 4 -// CHECK16-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 -// CHECK16-NEXT: [[TMP34:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK16-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP34]], i32 5) -// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK16-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] -// CHECK16-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] -// CHECK16-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK16-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] -// CHECK16-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] -// CHECK16: cond.true11: -// CHECK16-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK16-NEXT: br label [[COND_END13:%.*]] -// CHECK16: cond.false12: -// CHECK16-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: br label [[COND_END13]] -// CHECK16: cond.end13: -// CHECK16-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE11]] ], [ [[TMP44]], [[COND_FALSE12]] ] -// CHECK16-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK16: omp.inner.for.end: -// CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK16: omp.loop.exit: -// CHECK16-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 -// CHECK16-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]]) -// CHECK16-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK16-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 -// CHECK16-NEXT: br i1 [[TMP49]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK16: .omp.lastprivate.then: -// CHECK16-NEXT: [[TMP50:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP50]], i32* [[L_ADDR]], align 4 -// CHECK16-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK16: .omp.lastprivate.done: -// CHECK16-NEXT: br label [[OMP_PRECOND_END]] -// CHECK16: omp.precond.end: -// CHECK16-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP1]]) -// CHECK16-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39 +// CHECK3-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK3: .execute: +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] +// CHECK3-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK3: .omp.deinit: +// CHECK3-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK3-NEXT: br label [[DOTEXIT:%.*]] +// CHECK3: .exit: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__4 +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK3-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* +// CHECK3-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP14:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK3-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP15]], i32 3) +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 +// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK3: cond.true5: +// CHECK3-NEXT: br label [[COND_END7:%.*]] +// CHECK3: cond.false6: +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END7]] +// CHECK3: cond.end7: +// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] +// CHECK3-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__5 +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3: omp.body.continue: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: ret void // // -// CHECK16-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK16-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { -// CHECK16-NEXT: entry: -// CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK16-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK16-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK16-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK16-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK16-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 -// CHECK16-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK16-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK16-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK16-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK16-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK16-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK16-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK16-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK16: omp.precond.then: -// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK16-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 -// CHECK16-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK16-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK16-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK16-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) -// CHECK16-NEXT: br label [[OMP_DISPATCH_COND:%.*]] -// CHECK16: omp.dispatch.cond: -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK16-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// CHECK16-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK16: cond.true: -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK16-NEXT: br label [[COND_END:%.*]] -// CHECK16: cond.false: -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK16-NEXT: br label [[COND_END]] -// CHECK16: cond.end: -// CHECK16-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] -// CHECK16-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK16-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK16-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK16-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] -// CHECK16: omp.dispatch.body: -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK16-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK16-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK16-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK16-NEXT: store i32 [[TMP20]], i32* [[L_ADDR]], align 4 -// CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK16: omp.body.continue: -// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK16-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK16: omp.inner.for.end: -// CHECK16-NEXT: br label [[OMP_DISPATCH_INC:%.*]] -// CHECK16: omp.dispatch.inc: -// CHECK16-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK16-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK16-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 -// CHECK16-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK16-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK16-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 -// CHECK16-NEXT: br label [[OMP_DISPATCH_COND]] -// CHECK16: omp.dispatch.end: -// CHECK16-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK16-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) -// CHECK16-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK16-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK16-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK16: .omp.lastprivate.then: -// CHECK16-NEXT: [[TMP30:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP30]], i32* [[L_ADDR]], align 4 -// CHECK16-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK16: .omp.lastprivate.done: -// CHECK16-NEXT: br label [[OMP_PRECOND_END]] -// CHECK16: omp.precond.end: -// CHECK16-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44 +// CHECK3-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK3: .execute: +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[F_CASTED]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK3: .omp.deinit: +// CHECK3-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK3-NEXT: br label [[DOTEXIT:%.*]] +// CHECK3: .exit: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__6 +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[F_CASTED]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK3-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to i8* +// CHECK3-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* +// CHECK3-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP18:%.*]] = inttoptr i32 [[TMP10]] to i8* +// CHECK3-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x [10 x i32]]*, i32)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP19]], i32 4) +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 +// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK3: cond.true6: +// CHECK3-NEXT: br label [[COND_END8:%.*]] +// CHECK3: cond.false7: +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END8]] +// CHECK3: cond.end8: +// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] +// CHECK3-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__7 +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK3-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]] +// CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[J]], align 4 +// CHECK3-NEXT: store i32 10, i32* [[K]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK3-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP15]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 +// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX9]], align 4 +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3: omp.body.continue: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: ret void // // -// CHECK16-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 -// CHECK16-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK16-NEXT: entry: -// CHECK16-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 -// CHECK16-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK16-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK16-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK16-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK16-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK16: .execute: -// CHECK16-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK16-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] -// CHECK16-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK16: .omp.deinit: -// CHECK16-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK16-NEXT: br label [[DOTEXIT:%.*]] -// CHECK16: .exit: -// CHECK16-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52 +// CHECK3-SAME: (i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK3: .execute: +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR2]] +// CHECK3-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK3: .omp.deinit: +// CHECK3-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK3-NEXT: br label [[DOTEXIT:%.*]] +// CHECK3: .exit: +// CHECK3-NEXT: ret void // // -// CHECK16-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK16-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK16-NEXT: entry: -// CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK16-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 -// CHECK16-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 -// CHECK16-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK16-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK16-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK16-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK16-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK16-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK16-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK16-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK16-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK16-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK16: omp.precond.then: -// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK16-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK16-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK16-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK16-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK16-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] -// CHECK16-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK16: cond.true: -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK16-NEXT: br label [[COND_END:%.*]] -// CHECK16: cond.false: -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: br label [[COND_END]] -// CHECK16: cond.end: -// CHECK16-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] -// CHECK16-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK16-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] -// CHECK16-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK16-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK16-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to i8* -// CHECK16-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 4 -// CHECK16-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK16-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to i8* -// CHECK16-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 -// CHECK16-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK16-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK16-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 -// CHECK16-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK16-NEXT: [[TMP25:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* -// CHECK16-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 -// CHECK16-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK16-NEXT: [[TMP28:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK16-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP28]], i32 4) -// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] -// CHECK16-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK16-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK16-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK16-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] -// CHECK16-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] -// CHECK16: cond.true10: -// CHECK16-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK16-NEXT: br label [[COND_END12:%.*]] -// CHECK16: cond.false11: -// CHECK16-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: br label [[COND_END12]] -// CHECK16: cond.end12: -// CHECK16-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] -// CHECK16-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: store i32 [[TMP39]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK16: omp.inner.for.end: -// CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK16: omp.loop.exit: -// CHECK16-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 -// CHECK16-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP41]]) -// CHECK16-NEXT: br label [[OMP_PRECOND_END]] -// CHECK16: omp.precond.end: -// CHECK16-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__8 +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK3-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK3-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK3-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[J]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK3: land.lhs.true: +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK3: omp.precond.then: +// CHECK3-NEXT: store i64 0, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: [[CONV11:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV11]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: [[CMP12:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: br i1 [[CMP12]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i64 [[COND]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP14]], i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP16]], 1 +// CHECK3-NEXT: [[CMP13:%.*]] = icmp slt i64 [[TMP15]], [[ADD]] +// CHECK3-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK3-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP21]], i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP18]] to i8* +// CHECK3-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP20]] to i8* +// CHECK3-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP22]] to i8* +// CHECK3-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP30:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* +// CHECK3-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [10 x [10 x i32]]*)* @__omp_outlined__9 to i8*), i8* null, i8** [[TMP33]], i32 4) +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP35:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK3-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP34]], [[TMP35]] +// CHECK3-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK3-NEXT: [[TMP37:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK3-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP36]], [[TMP37]] +// CHECK3-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK3-NEXT: [[TMP38:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP39:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], [[TMP39]] +// CHECK3-NEXT: store i64 [[ADD16]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP40:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP41:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: [[CMP17:%.*]] = icmp sgt i64 [[TMP40]], [[TMP41]] +// CHECK3-NEXT: br i1 [[CMP17]], label [[COND_TRUE18:%.*]], label [[COND_FALSE19:%.*]] +// CHECK3: cond.true18: +// CHECK3-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: br label [[COND_END20:%.*]] +// CHECK3: cond.false19: +// CHECK3-NEXT: [[TMP43:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: br label [[COND_END20]] +// CHECK3: cond.end20: +// CHECK3-NEXT: [[COND21:%.*]] = phi i64 [ [[TMP42]], [[COND_TRUE18]] ], [ [[TMP43]], [[COND_FALSE19]] ] +// CHECK3-NEXT: store i64 [[COND21]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK3-NEXT: [[TMP44:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP44]], i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP46]]) +// CHECK3-NEXT: br label [[OMP_PRECOND_END]] +// CHECK3: omp.precond.end: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__9 +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I11:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK3-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK3-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK3-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK3-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[J]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK3-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK3: land.lhs.true: +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK3-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK3: omp.precond.then: +// CHECK3-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK3-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: [[CONV9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[CONV10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK3-NEXT: store i64 [[CONV9]], i64* [[DOTOMP_LB]], align 8 +// CHECK3-NEXT: store i64 [[CONV10]], i64* [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 33, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK3-NEXT: store i64 [[TMP12]], i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[CONV13:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK3-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP13]], [[CONV13]] +// CHECK3-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP16]], 0 +// CHECK3-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK3-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// CHECK3-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// CHECK3-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP15]], [[CONV18]] +// CHECK3-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// CHECK3-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK3-NEXT: store i32 [[CONV21]], i32* [[I11]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP19]], 0 +// CHECK3-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// CHECK3-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// CHECK3-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// CHECK3-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP18]], [[CONV25]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP20]], 0 +// CHECK3-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// CHECK3-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// CHECK3-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// CHECK3-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// CHECK3-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP17]], [[MUL31]] +// CHECK3-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// CHECK3-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// CHECK3-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// CHECK3-NEXT: store i32 [[CONV35]], i32* [[J12]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[I11]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[J12]], align 4 +// CHECK3-NEXT: [[ADD36:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[I11]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP23]] +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[J12]], align 4 +// CHECK3-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP24]] +// CHECK3-NEXT: store i32 [[ADD36]], i32* [[ARRAYIDX37]], align 4 +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3: omp.body.continue: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK3-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] +// CHECK3-NEXT: store i64 [[ADD38]], i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) +// CHECK3-NEXT: br label [[OMP_PRECOND_END]] +// CHECK3: omp.precond.end: +// CHECK3-NEXT: ret void // // -// CHECK16-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK16-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK16-NEXT: entry: -// CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK16-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 -// CHECK16-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK16-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK16-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK16-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK16-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK16-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK16-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK16-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK16-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK16-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK16-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK16-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK16: omp.precond.then: -// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK16-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 -// CHECK16-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK16-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK16-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK16-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK16-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK16-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] -// CHECK16-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 -// CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK16-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 -// CHECK16-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 -// CHECK16-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1 -// CHECK16-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK16-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX]], align 2 -// CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK16: omp.body.continue: -// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK16-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK16: omp.inner.for.end: -// CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK16: omp.loop.exit: -// CHECK16-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK16-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK16-NEXT: br label [[OMP_PRECOND_END]] -// CHECK16: omp.precond.end: -// CHECK16-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59 +// CHECK3-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK3-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK3: .execute: +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[V_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i32]* [[TMP0]], i32* [[TMP4]]) #[[ATTR2]] +// CHECK3-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK3: .omp.deinit: +// CHECK3-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK3-NEXT: br label [[DOTEXIT:%.*]] +// CHECK3: .exit: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__10 +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK3-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK3: omp.precond.then: +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32*, i32** [[V_ADDR]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP20:%.*]] = inttoptr i32 [[TMP14]] to i8* +// CHECK3-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP15]] to i8* +// CHECK3-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP17]] to i8* +// CHECK3-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP26:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* +// CHECK3-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP18]] to i8* +// CHECK3-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @__omp_outlined__11 to i8*), i8* null, i8** [[TMP31]], i32 5) +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] +// CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK3: cond.true10: +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: br label [[COND_END12:%.*]] +// CHECK3: cond.false11: +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END12]] +// CHECK3: cond.end12: +// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE10]] ], [ [[TMP41]], [[COND_FALSE11]] ] +// CHECK3-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP42]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]]) +// CHECK3-NEXT: br label [[OMP_PRECOND_END]] +// CHECK3: omp.precond.end: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__11 +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK3-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK3: omp.precond.then: +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[V_ADDR]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i32 [[TMP14]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX5]], align 4 +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3: omp.body.continue: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) +// CHECK3-NEXT: br label [[OMP_PRECOND_END]] +// CHECK3: omp.precond.end: +// CHECK3-NEXT: ret void // // -// CHECK16-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 -// CHECK16-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK16-NEXT: entry: -// CHECK16-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK16-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK16-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK16-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK16-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK16-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK16-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK16: .execute: -// CHECK16-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK16-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] -// CHECK16-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK16: .omp.deinit: -// CHECK16-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK16-NEXT: br label [[DOTEXIT:%.*]] -// CHECK16: .exit: -// CHECK16-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l28 +// CHECK4-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK4-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK4-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK4-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK4: .execute: +// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], i32* [[L_CASTED]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i32]* [[TMP0]], i32 [[TMP5]]) #[[ATTR2:[0-9]+]] +// CHECK4-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK4: .omp.deinit: +// CHECK4-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK4-NEXT: br label [[DOTEXIT:%.*]] +// CHECK4: .exit: +// CHECK4-NEXT: ret void // // -// CHECK16-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK16-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK16-NEXT: entry: -// CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK16-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK16-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 -// CHECK16-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK16-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK16-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK16-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK16-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK16-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 -// CHECK16-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK16: cond.true: -// CHECK16-NEXT: br label [[COND_END:%.*]] -// CHECK16: cond.false: -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: br label [[COND_END]] -// CHECK16: cond.end: -// CHECK16-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] -// CHECK16-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 -// CHECK16-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK16-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK16-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 -// CHECK16-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK16-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK16-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 -// CHECK16-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK16-NEXT: [[TMP14:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK16-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 -// CHECK16-NEXT: [[TMP15:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK16-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP15]], i32 3) -// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK16-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK16-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 -// CHECK16-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK16: cond.true5: -// CHECK16-NEXT: br label [[COND_END7:%.*]] -// CHECK16: cond.false6: -// CHECK16-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: br label [[COND_END7]] -// CHECK16: cond.end7: -// CHECK16-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] -// CHECK16-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK16: omp.inner.for.end: -// CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK16: omp.loop.exit: -// CHECK16-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK16-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK4-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK4-NEXT: [[L1:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK4-NEXT: [[L_ON_STACK:%.*]] = bitcast i8* [[L1]] to i32* +// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK4-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK4-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK4: omp.precond.then: +// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK4-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK4: cond.true: +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: br label [[COND_END:%.*]] +// CHECK4: cond.false: +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: br label [[COND_END]] +// CHECK4: cond.end: +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK4: omp.inner.for.cond: +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK4-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK4-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4: omp.inner.for.body: +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP18]], i32* [[L_CASTED]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP14]] to i8* +// CHECK4-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP15]] to i8* +// CHECK4-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to i8* +// CHECK4-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP27:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* +// CHECK4-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 +// CHECK4-NEXT: [[TMP29:%.*]] = inttoptr i32 [[TMP19]] to i8* +// CHECK4-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 +// CHECK4-NEXT: [[TMP32:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP32]], i32 5) +// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK4: omp.inner.for.inc: +// CHECK4-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] +// CHECK4-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK4: cond.true11: +// CHECK4-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: br label [[COND_END13:%.*]] +// CHECK4: cond.false12: +// CHECK4-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: br label [[COND_END13]] +// CHECK4: cond.end13: +// CHECK4-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE11]] ], [ [[TMP42]], [[COND_FALSE12]] ] +// CHECK4-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP43]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK4: omp.inner.for.end: +// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK4: omp.loop.exit: +// CHECK4-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP45]]) +// CHECK4-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 +// CHECK4-NEXT: br i1 [[TMP47]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK4: .omp.lastprivate.then: +// CHECK4-NEXT: [[TMP48:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP48]], i32* [[L_ADDR]], align 4 +// CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK4: .omp.lastprivate.done: +// CHECK4-NEXT: br label [[OMP_PRECOND_END]] +// CHECK4: omp.precond.end: +// CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[L1]]) +// CHECK4-NEXT: ret void +// +// +// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK4-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK4-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK4: omp.precond.then: +// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) +// CHECK4-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK4: omp.dispatch.cond: +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK4-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] +// CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK4: cond.true: +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK4-NEXT: br label [[COND_END:%.*]] +// CHECK4: cond.false: +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: br label [[COND_END]] +// CHECK4: cond.end: +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK4: omp.dispatch.body: +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK4: omp.inner.for.cond: +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK4-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4: omp.inner.for.body: +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] +// CHECK4-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK4-NEXT: store i32 [[TMP20]], i32* [[L_ADDR]], align 4 +// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK4: omp.body.continue: +// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK4: omp.inner.for.inc: +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK4: omp.inner.for.end: +// CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK4: omp.dispatch.inc: +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK4: omp.dispatch.end: +// CHECK4-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK4-NEXT: br i1 [[TMP29]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK4: .omp.lastprivate.then: +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP30]], i32* [[L_ADDR]], align 4 +// CHECK4-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK4: .omp.lastprivate.done: +// CHECK4-NEXT: br label [[OMP_PRECOND_END]] +// CHECK4: omp.precond.end: +// CHECK4-NEXT: ret void // // -// CHECK16-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK16-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK16-NEXT: entry: -// CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK16-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK16-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK16-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK16-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK16-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK16-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK16-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK16-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 -// CHECK16-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK16-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK16-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK16-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] -// CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 -// CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 -// CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK16: omp.body.continue: -// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK16-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK16: omp.inner.for.end: -// CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK16: omp.loop.exit: -// CHECK16-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK16-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l34 +// CHECK4-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK4-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK4-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK4: .execute: +// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK4-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] +// CHECK4-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK4: .omp.deinit: +// CHECK4-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK4-NEXT: br label [[DOTEXIT:%.*]] +// CHECK4: .exit: +// CHECK4-NEXT: ret void // // -// CHECK16-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59 -// CHECK16-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { -// CHECK16-NEXT: entry: -// CHECK16-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK16-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK16-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 -// CHECK16-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK16-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK16-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK16-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK16: .execute: -// CHECK16-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[F_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP2]], i32* [[F_CASTED]], align 4 -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[F_CASTED]], align 4 -// CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK16-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP3]]) #[[ATTR2]] -// CHECK16-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK16: .omp.deinit: -// CHECK16-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK16-NEXT: br label [[DOTEXIT:%.*]] -// CHECK16: .exit: -// CHECK16-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 +// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK4-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK4: omp.precond.then: +// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK4: cond.true: +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: br label [[COND_END:%.*]] +// CHECK4: cond.false: +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: br label [[COND_END]] +// CHECK4: cond.end: +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK4: omp.inner.for.cond: +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK4-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4: omp.inner.for.body: +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to i8* +// CHECK4-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to i8* +// CHECK4-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to i8* +// CHECK4-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP25:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* +// CHECK4-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP28]], i32 4) +// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK4: omp.inner.for.inc: +// CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK4-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] +// CHECK4-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK4: cond.true10: +// CHECK4-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: br label [[COND_END12:%.*]] +// CHECK4: cond.false11: +// CHECK4-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: br label [[COND_END12]] +// CHECK4: cond.end12: +// CHECK4-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] +// CHECK4-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP39]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK4: omp.inner.for.end: +// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK4: omp.loop.exit: +// CHECK4-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP41]]) +// CHECK4-NEXT: br label [[OMP_PRECOND_END]] +// CHECK4: omp.precond.end: +// CHECK4-NEXT: ret void +// +// +// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__3 +// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK4-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK4: omp.precond.then: +// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK4: omp.inner.for.cond: +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK4-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] +// CHECK4-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4: omp.inner.for.body: +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK4-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 +// CHECK4-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK4-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 +// CHECK4-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX]], align 2 +// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK4: omp.body.continue: +// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK4: omp.inner.for.inc: +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK4: omp.inner.for.end: +// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK4: omp.loop.exit: +// CHECK4-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) +// CHECK4-NEXT: br label [[OMP_PRECOND_END]] +// CHECK4: omp.precond.end: +// CHECK4-NEXT: ret void // // -// CHECK16-LABEL: define {{[^@]+}}@__omp_outlined__6 -// CHECK16-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { -// CHECK16-NEXT: entry: -// CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK16-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK16-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 -// CHECK16-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK16-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 -// CHECK16-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK16-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK16-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK16-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 -// CHECK16-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK16: cond.true: -// CHECK16-NEXT: br label [[COND_END:%.*]] -// CHECK16: cond.false: -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: br label [[COND_END]] -// CHECK16: cond.end: -// CHECK16-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] -// CHECK16-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 -// CHECK16-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP9]], i32* [[F_CASTED]], align 4 -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[F_CASTED]], align 4 -// CHECK16-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK16-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK16-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 -// CHECK16-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK16-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK16-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 -// CHECK16-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK16-NEXT: [[TMP16:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK16-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4 -// CHECK16-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK16-NEXT: [[TMP18:%.*]] = inttoptr i32 [[TMP10]] to i8* -// CHECK16-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4 -// CHECK16-NEXT: [[TMP19:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK16-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x [10 x i32]]*, i32)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP19]], i32 4) -// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK16-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK16-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 -// CHECK16-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] -// CHECK16: cond.true6: -// CHECK16-NEXT: br label [[COND_END8:%.*]] -// CHECK16: cond.false7: -// CHECK16-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: br label [[COND_END8]] -// CHECK16: cond.end8: -// CHECK16-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] -// CHECK16-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK16: omp.inner.for.end: -// CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK16: omp.loop.exit: -// CHECK16-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK16-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l39 +// CHECK4-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK4-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK4-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK4: .execute: +// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK4-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] +// CHECK4-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK4: .omp.deinit: +// CHECK4-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK4-NEXT: br label [[DOTEXIT:%.*]] +// CHECK4: .exit: +// CHECK4-NEXT: ret void // // -// CHECK16-LABEL: define {{[^@]+}}@__omp_outlined__7 -// CHECK16-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { -// CHECK16-NEXT: entry: -// CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK16-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK16-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK16-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK16-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK16-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 -// CHECK16-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK16-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK16-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 -// CHECK16-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK16-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK16-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK16-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] -// CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 -// CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 -// CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10 -// CHECK16-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10 -// CHECK16-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]] -// CHECK16-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 -// CHECK16-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] -// CHECK16-NEXT: store i32 [[ADD5]], i32* [[J]], align 4 -// CHECK16-NEXT: store i32 10, i32* [[K]], align 4 -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[F_ADDR]], align 4 -// CHECK16-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] -// CHECK16-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]] -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 -// CHECK16-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]] -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP15]] -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 -// CHECK16-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP16]] -// CHECK16-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX9]], align 4 -// CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK16: omp.body.continue: -// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK16-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK16: omp.inner.for.end: -// CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK16: omp.loop.exit: -// CHECK16-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK16-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__4 +// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK4: cond.true: +// CHECK4-NEXT: br label [[COND_END:%.*]] +// CHECK4: cond.false: +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: br label [[COND_END]] +// CHECK4: cond.end: +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK4: omp.inner.for.cond: +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4: omp.inner.for.body: +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK4-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* +// CHECK4-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP14:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK4-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP15]], i32 3) +// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK4: omp.inner.for.inc: +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 +// CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK4: cond.true5: +// CHECK4-NEXT: br label [[COND_END7:%.*]] +// CHECK4: cond.false6: +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: br label [[COND_END7]] +// CHECK4: cond.end7: +// CHECK4-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] +// CHECK4-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK4: omp.inner.for.end: +// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK4: omp.loop.exit: +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: ret void // // -// CHECK16-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l67 -// CHECK16-SAME: (i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { -// CHECK16-NEXT: entry: -// CHECK16-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK16-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK16-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK16-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK16-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK16-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK16: .execute: -// CHECK16-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK16-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR2]] -// CHECK16-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK16: .omp.deinit: -// CHECK16-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK16-NEXT: br label [[DOTEXIT:%.*]] -// CHECK16: .exit: -// CHECK16-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__5 +// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK4-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK4: omp.inner.for.cond: +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4: omp.inner.for.body: +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK4-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK4: omp.body.continue: +// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK4: omp.inner.for.inc: +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK4: omp.inner.for.end: +// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK4: omp.loop.exit: +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK4-NEXT: ret void // // -// CHECK16-LABEL: define {{[^@]+}}@__omp_outlined__8 -// CHECK16-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { -// CHECK16-NEXT: entry: -// CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK16-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK16-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 -// CHECK16-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 -// CHECK16-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 -// CHECK16-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 -// CHECK16-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 -// CHECK16-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[I9:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[J10:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 -// CHECK16-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK16-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK16-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK16-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 -// CHECK16-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK16-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK16-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK16-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 -// CHECK16-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 -// CHECK16-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] -// CHECK16-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK16-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK16-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK16-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK16-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] -// CHECK16-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK16: land.lhs.true: -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK16-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]] -// CHECK16-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] -// CHECK16: omp.precond.then: -// CHECK16-NEXT: store i64 0, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK16-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK16-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK16-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK16-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK16-NEXT: [[CONV11:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64 -// CHECK16-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 -// CHECK16-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV11]]) -// CHECK16-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK16-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK16-NEXT: [[CMP12:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]] -// CHECK16-NEXT: br i1 [[CMP12]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK16: cond.true: -// CHECK16-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK16-NEXT: br label [[COND_END:%.*]] -// CHECK16: cond.false: -// CHECK16-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK16-NEXT: br label [[COND_END]] -// CHECK16: cond.end: -// CHECK16-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] -// CHECK16-NEXT: store i64 [[COND]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK16-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK16-NEXT: store i64 [[TMP14]], i64* [[DOTOMP_IV]], align 8 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK16-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK16-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP16]], 1 -// CHECK16-NEXT: [[CMP13:%.*]] = icmp slt i64 [[TMP15]], [[ADD]] -// CHECK16-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK16-NEXT: [[TMP18:%.*]] = trunc i64 [[TMP17]] to i32 -// CHECK16-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK16-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 -// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP21]], i32* [[N_CASTED]], align 4 -// CHECK16-NEXT: [[TMP22:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK16-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK16-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP18]] to i8* -// CHECK16-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 -// CHECK16-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK16-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP20]] to i8* -// CHECK16-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 -// CHECK16-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK16-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP22]] to i8* -// CHECK16-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 -// CHECK16-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK16-NEXT: [[TMP30:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK16-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 4 -// CHECK16-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 -// CHECK16-NEXT: [[TMP33:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK16-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [10 x [10 x i32]]*)* @__omp_outlined__9 to i8*), i8* null, i8** [[TMP33]], i32 4) -// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK16-NEXT: [[TMP35:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK16-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP34]], [[TMP35]] -// CHECK16-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_IV]], align 8 -// CHECK16-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK16-NEXT: [[TMP37:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK16-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP36]], [[TMP37]] -// CHECK16-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK16-NEXT: [[TMP38:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK16-NEXT: [[TMP39:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK16-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], [[TMP39]] -// CHECK16-NEXT: store i64 [[ADD16]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK16-NEXT: [[TMP40:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK16-NEXT: [[TMP41:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK16-NEXT: [[CMP17:%.*]] = icmp sgt i64 [[TMP40]], [[TMP41]] -// CHECK16-NEXT: br i1 [[CMP17]], label [[COND_TRUE18:%.*]], label [[COND_FALSE19:%.*]] -// CHECK16: cond.true18: -// CHECK16-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK16-NEXT: br label [[COND_END20:%.*]] -// CHECK16: cond.false19: -// CHECK16-NEXT: [[TMP43:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK16-NEXT: br label [[COND_END20]] -// CHECK16: cond.end20: -// CHECK16-NEXT: [[COND21:%.*]] = phi i64 [ [[TMP42]], [[COND_TRUE18]] ], [ [[TMP43]], [[COND_FALSE19]] ] -// CHECK16-NEXT: store i64 [[COND21]], i64* [[DOTOMP_COMB_UB]], align 8 -// CHECK16-NEXT: [[TMP44:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 -// CHECK16-NEXT: store i64 [[TMP44]], i64* [[DOTOMP_IV]], align 8 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK16: omp.inner.for.end: -// CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK16: omp.loop.exit: -// CHECK16-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 -// CHECK16-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP46]]) -// CHECK16-NEXT: br label [[OMP_PRECOND_END]] -// CHECK16: omp.precond.end: -// CHECK16-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44 +// CHECK4-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK4-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK4-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK4-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK4-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK4: .execute: +// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[F_CASTED]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK4-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP3]]) #[[ATTR2]] +// CHECK4-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK4: .omp.deinit: +// CHECK4-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK4-NEXT: br label [[DOTEXIT:%.*]] +// CHECK4: .exit: +// CHECK4-NEXT: ret void // // -// CHECK16-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK16-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { -// CHECK16-NEXT: entry: -// CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK16-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK16-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 -// CHECK16-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 -// CHECK16-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 -// CHECK16-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 -// CHECK16-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 -// CHECK16-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[I11:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[J12:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK16-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK16-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK16-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK16-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK16-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 -// CHECK16-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK16-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK16-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK16-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 -// CHECK16-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 -// CHECK16-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] -// CHECK16-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 -// CHECK16-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK16-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK16-NEXT: store i32 0, i32* [[J]], align 4 -// CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK16-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] -// CHECK16-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK16: land.lhs.true: -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK16-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]] -// CHECK16-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] -// CHECK16: omp.precond.then: -// CHECK16-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 -// CHECK16-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 -// CHECK16-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_UB]], align 8 -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK16-NEXT: [[CONV9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK16-NEXT: [[CONV10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK16-NEXT: store i64 [[CONV9]], i64* [[DOTOMP_LB]], align 8 -// CHECK16-NEXT: store i64 [[CONV10]], i64* [[DOTOMP_UB]], align 8 -// CHECK16-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK16-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK16-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 33, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) -// CHECK16-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 -// CHECK16-NEXT: store i64 [[TMP12]], i64* [[DOTOMP_IV]], align 8 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK16-NEXT: [[CONV13:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK16-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP13]], [[CONV13]] -// CHECK16-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK16-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP16]], 0 -// CHECK16-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK16-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] -// CHECK16-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 -// CHECK16-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP15]], [[CONV18]] -// CHECK16-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 -// CHECK16-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] -// CHECK16-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK16-NEXT: store i32 [[CONV21]], i32* [[I11]], align 4 -// CHECK16-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK16-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK16-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP19]], 0 -// CHECK16-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 -// CHECK16-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] -// CHECK16-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 -// CHECK16-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP18]], [[CONV25]] -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK16-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP20]], 0 -// CHECK16-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 -// CHECK16-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] -// CHECK16-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 -// CHECK16-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] -// CHECK16-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP17]], [[MUL31]] -// CHECK16-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 -// CHECK16-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] -// CHECK16-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 -// CHECK16-NEXT: store i32 [[CONV35]], i32* [[J12]], align 4 -// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[I11]], align 4 -// CHECK16-NEXT: [[TMP22:%.*]] = load i32, i32* [[J12]], align 4 -// CHECK16-NEXT: [[ADD36:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK16-NEXT: [[TMP23:%.*]] = load i32, i32* [[I11]], align 4 -// CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP23]] -// CHECK16-NEXT: [[TMP24:%.*]] = load i32, i32* [[J12]], align 4 -// CHECK16-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP24]] -// CHECK16-NEXT: store i32 [[ADD36]], i32* [[ARRAYIDX37]], align 4 -// CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK16: omp.body.continue: -// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK16-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 -// CHECK16-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] -// CHECK16-NEXT: store i64 [[ADD38]], i64* [[DOTOMP_IV]], align 8 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK16: omp.inner.for.end: -// CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK16: omp.loop.exit: -// CHECK16-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 -// CHECK16-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) -// CHECK16-NEXT: br label [[OMP_PRECOND_END]] -// CHECK16: omp.precond.end: -// CHECK16-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__6 +// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK4-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 +// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK4-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK4-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK4-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK4: cond.true: +// CHECK4-NEXT: br label [[COND_END:%.*]] +// CHECK4: cond.false: +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: br label [[COND_END]] +// CHECK4: cond.end: +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK4: omp.inner.for.cond: +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 +// CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4: omp.inner.for.body: +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP9]], i32* [[F_CASTED]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK4-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to i8* +// CHECK4-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP16:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* +// CHECK4-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP18:%.*]] = inttoptr i32 [[TMP10]] to i8* +// CHECK4-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x [10 x i32]]*, i32)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP19]], i32 4) +// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK4: omp.inner.for.inc: +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 +// CHECK4-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK4: cond.true6: +// CHECK4-NEXT: br label [[COND_END8:%.*]] +// CHECK4: cond.false7: +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: br label [[COND_END8]] +// CHECK4: cond.end8: +// CHECK4-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] +// CHECK4-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK4: omp.inner.for.end: +// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK4: omp.loop.exit: +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK4-NEXT: ret void +// +// +// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__7 +// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK4-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK4-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK4: omp.inner.for.cond: +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4: omp.inner.for.body: +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK4-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]] +// CHECK4-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] +// CHECK4-NEXT: store i32 [[ADD5]], i32* [[J]], align 4 +// CHECK4-NEXT: store i32 10, i32* [[K]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK4-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] +// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]] +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 +// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]] +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP15]] +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 +// CHECK4-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP16]] +// CHECK4-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX9]], align 4 +// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK4: omp.body.continue: +// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK4: omp.inner.for.inc: +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK4-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK4: omp.inner.for.end: +// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK4: omp.loop.exit: +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK4-NEXT: ret void +// +// +// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l52 +// CHECK4-SAME: (i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK4-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK4-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK4: .execute: +// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK4-NEXT: call void @__omp_outlined__8(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [10 x [10 x i32]]* [[TMP0]]) #[[ATTR2]] +// CHECK4-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK4: .omp.deinit: +// CHECK4-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK4-NEXT: br label [[DOTEXIT:%.*]] +// CHECK4: .exit: +// CHECK4-NEXT: ret void // // -// CHECK16-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l74 -// CHECK16-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { -// CHECK16-NEXT: entry: -// CHECK16-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK16-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 4 -// CHECK16-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK16-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 -// CHECK16-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK16-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK16-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK16-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK16: .execute: -// CHECK16-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK16-NEXT: [[TMP4:%.*]] = load i32*, i32** [[V_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK16-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i32]* [[TMP0]], i32* [[TMP4]]) #[[ATTR2]] -// CHECK16-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK16: .omp.deinit: -// CHECK16-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK16-NEXT: br label [[DOTEXIT:%.*]] -// CHECK16: .exit: -// CHECK16-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__8 +// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I9:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[J10:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 +// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK4-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK4-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK4-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK4-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK4-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[J]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK4-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK4: land.lhs.true: +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK4-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK4: omp.precond.then: +// CHECK4-NEXT: store i64 0, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK4-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK4-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK4-NEXT: [[CONV11:%.*]] = zext i32 [[NVPTX_NUM_THREADS]] to i64 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP8]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB1]], i32 [[TMP9]], i32 91, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_COMB_LB]], i64* [[DOTOMP_COMB_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV11]]) +// CHECK4-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK4-NEXT: [[CMP12:%.*]] = icmp sgt i64 [[TMP10]], [[TMP11]] +// CHECK4-NEXT: br i1 [[CMP12]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK4: cond.true: +// CHECK4-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK4-NEXT: br label [[COND_END:%.*]] +// CHECK4: cond.false: +// CHECK4-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK4-NEXT: br label [[COND_END]] +// CHECK4: cond.end: +// CHECK4-NEXT: [[COND:%.*]] = phi i64 [ [[TMP12]], [[COND_TRUE]] ], [ [[TMP13]], [[COND_FALSE]] ] +// CHECK4-NEXT: store i64 [[COND]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK4-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK4-NEXT: store i64 [[TMP14]], i64* [[DOTOMP_IV]], align 8 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK4: omp.inner.for.cond: +// CHECK4-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK4-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP16]], 1 +// CHECK4-NEXT: [[CMP13:%.*]] = icmp slt i64 [[TMP15]], [[ADD]] +// CHECK4-NEXT: br i1 [[CMP13]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4: omp.inner.for.body: +// CHECK4-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK4-NEXT: [[TMP18:%.*]] = trunc i64 [[TMP17]] to i32 +// CHECK4-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK4-NEXT: [[TMP20:%.*]] = trunc i64 [[TMP19]] to i32 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP21]], i32* [[N_CASTED]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP18]] to i8* +// CHECK4-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 +// CHECK4-NEXT: [[TMP25:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP20]] to i8* +// CHECK4-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 +// CHECK4-NEXT: [[TMP27:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP22]] to i8* +// CHECK4-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 +// CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP30:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* +// CHECK4-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 4 +// CHECK4-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP31]], align 4 +// CHECK4-NEXT: [[TMP33:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP32]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [10 x [10 x i32]]*)* @__omp_outlined__9 to i8*), i8* null, i8** [[TMP33]], i32 4) +// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK4: omp.inner.for.inc: +// CHECK4-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK4-NEXT: [[TMP35:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK4-NEXT: [[ADD14:%.*]] = add nsw i64 [[TMP34]], [[TMP35]] +// CHECK4-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_IV]], align 8 +// CHECK4-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK4-NEXT: [[TMP37:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK4-NEXT: [[ADD15:%.*]] = add nsw i64 [[TMP36]], [[TMP37]] +// CHECK4-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK4-NEXT: [[TMP38:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK4-NEXT: [[TMP39:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK4-NEXT: [[ADD16:%.*]] = add nsw i64 [[TMP38]], [[TMP39]] +// CHECK4-NEXT: store i64 [[ADD16]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK4-NEXT: [[TMP40:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK4-NEXT: [[TMP41:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK4-NEXT: [[CMP17:%.*]] = icmp sgt i64 [[TMP40]], [[TMP41]] +// CHECK4-NEXT: br i1 [[CMP17]], label [[COND_TRUE18:%.*]], label [[COND_FALSE19:%.*]] +// CHECK4: cond.true18: +// CHECK4-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK4-NEXT: br label [[COND_END20:%.*]] +// CHECK4: cond.false19: +// CHECK4-NEXT: [[TMP43:%.*]] = load i64, i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK4-NEXT: br label [[COND_END20]] +// CHECK4: cond.end20: +// CHECK4-NEXT: [[COND21:%.*]] = phi i64 [ [[TMP42]], [[COND_TRUE18]] ], [ [[TMP43]], [[COND_FALSE19]] ] +// CHECK4-NEXT: store i64 [[COND21]], i64* [[DOTOMP_COMB_UB]], align 8 +// CHECK4-NEXT: [[TMP44:%.*]] = load i64, i64* [[DOTOMP_COMB_LB]], align 8 +// CHECK4-NEXT: store i64 [[TMP44]], i64* [[DOTOMP_IV]], align 8 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK4: omp.inner.for.end: +// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK4: omp.loop.exit: +// CHECK4-NEXT: [[TMP45:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP45]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP46]]) +// CHECK4-NEXT: br label [[OMP_PRECOND_END]] +// CHECK4: omp.precond.end: +// CHECK4-NEXT: ret void // // -// CHECK16-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK16-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { -// CHECK16-NEXT: entry: -// CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK16-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK16-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 4 -// CHECK16-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 4 -// CHECK16-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK16-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK16-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 -// CHECK16-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK16-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK16-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK16-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK16-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK16-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK16-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK16-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK16: omp.precond.then: -// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK16-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK16-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK16-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK16-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK16-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] -// CHECK16-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK16: cond.true: -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK16-NEXT: br label [[COND_END:%.*]] -// CHECK16: cond.false: -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: br label [[COND_END]] -// CHECK16: cond.end: -// CHECK16-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] -// CHECK16-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK16-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] -// CHECK16-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK16-NEXT: [[TMP18:%.*]] = load i32*, i32** [[V_ADDR]], align 4 -// CHECK16-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK16-NEXT: [[TMP20:%.*]] = inttoptr i32 [[TMP14]] to i8* -// CHECK16-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 4 -// CHECK16-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK16-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP15]] to i8* -// CHECK16-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 4 -// CHECK16-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK16-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK16-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 -// CHECK16-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK16-NEXT: [[TMP26:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK16-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 -// CHECK16-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 -// CHECK16-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP18]] to i8* -// CHECK16-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 -// CHECK16-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 -// CHECK16-NEXT: [[TMP31:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK16-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @__omp_outlined__11 to i8*), i8* null, i8** [[TMP31]], i32 5) -// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] -// CHECK16-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] -// CHECK16-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] -// CHECK16-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK16-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] -// CHECK16-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] -// CHECK16: cond.true10: -// CHECK16-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK16-NEXT: br label [[COND_END12:%.*]] -// CHECK16: cond.false11: -// CHECK16-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: br label [[COND_END12]] -// CHECK16: cond.end12: -// CHECK16-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE10]] ], [ [[TMP41]], [[COND_FALSE11]] ] -// CHECK16-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: store i32 [[TMP42]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK16: omp.inner.for.end: -// CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK16: omp.loop.exit: -// CHECK16-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 -// CHECK16-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]]) -// CHECK16-NEXT: br label [[OMP_PRECOND_END]] -// CHECK16: omp.precond.end: -// CHECK16-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__9 +// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]]) #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i64, align 8 +// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I11:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[J12:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP3]], 0 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK4-NEXT: [[CONV:%.*]] = sext i32 [[DIV]] to i64 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: [[SUB4:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK4-NEXT: [[DIV5:%.*]] = sdiv i32 [[SUB4]], 1 +// CHECK4-NEXT: [[CONV6:%.*]] = sext i32 [[DIV5]] to i64 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV]], [[CONV6]] +// CHECK4-NEXT: [[SUB7:%.*]] = sub nsw i64 [[MUL]], 1 +// CHECK4-NEXT: store i64 [[SUB7]], i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[J]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK4-NEXT: br i1 [[CMP]], label [[LAND_LHS_TRUE:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK4: land.lhs.true: +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: [[CMP8:%.*]] = icmp slt i32 0, [[TMP6]] +// CHECK4-NEXT: br i1 [[CMP8]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END]] +// CHECK4: omp.precond.then: +// CHECK4-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_3]], align 8 +// CHECK4-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_UB]], align 8 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK4-NEXT: [[CONV9:%.*]] = zext i32 [[TMP8]] to i64 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK4-NEXT: [[CONV10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK4-NEXT: store i64 [[CONV9]], i64* [[DOTOMP_LB]], align 8 +// CHECK4-NEXT: store i64 [[CONV10]], i64* [[DOTOMP_UB]], align 8 +// CHECK4-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_8(%struct.ident_t* @[[GLOB2]], i32 [[TMP11]], i32 33, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 1) +// CHECK4-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8 +// CHECK4-NEXT: store i64 [[TMP12]], i64* [[DOTOMP_IV]], align 8 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK4: omp.inner.for.cond: +// CHECK4-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK4-NEXT: [[CONV13:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK4-NEXT: [[CMP14:%.*]] = icmp sle i64 [[TMP13]], [[CONV13]] +// CHECK4-NEXT: br i1 [[CMP14]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4: omp.inner.for.body: +// CHECK4-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP16]], 0 +// CHECK4-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK4-NEXT: [[MUL17:%.*]] = mul nsw i32 1, [[DIV16]] +// CHECK4-NEXT: [[CONV18:%.*]] = sext i32 [[MUL17]] to i64 +// CHECK4-NEXT: [[DIV19:%.*]] = sdiv i64 [[TMP15]], [[CONV18]] +// CHECK4-NEXT: [[MUL20:%.*]] = mul nsw i64 [[DIV19]], 1 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i64 0, [[MUL20]] +// CHECK4-NEXT: [[CONV21:%.*]] = trunc i64 [[ADD]] to i32 +// CHECK4-NEXT: store i32 [[CONV21]], i32* [[I11]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK4-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: [[SUB22:%.*]] = sub nsw i32 [[TMP19]], 0 +// CHECK4-NEXT: [[DIV23:%.*]] = sdiv i32 [[SUB22]], 1 +// CHECK4-NEXT: [[MUL24:%.*]] = mul nsw i32 1, [[DIV23]] +// CHECK4-NEXT: [[CONV25:%.*]] = sext i32 [[MUL24]] to i64 +// CHECK4-NEXT: [[DIV26:%.*]] = sdiv i64 [[TMP18]], [[CONV25]] +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK4-NEXT: [[SUB27:%.*]] = sub nsw i32 [[TMP20]], 0 +// CHECK4-NEXT: [[DIV28:%.*]] = sdiv i32 [[SUB27]], 1 +// CHECK4-NEXT: [[MUL29:%.*]] = mul nsw i32 1, [[DIV28]] +// CHECK4-NEXT: [[CONV30:%.*]] = sext i32 [[MUL29]] to i64 +// CHECK4-NEXT: [[MUL31:%.*]] = mul nsw i64 [[DIV26]], [[CONV30]] +// CHECK4-NEXT: [[SUB32:%.*]] = sub nsw i64 [[TMP17]], [[MUL31]] +// CHECK4-NEXT: [[MUL33:%.*]] = mul nsw i64 [[SUB32]], 1 +// CHECK4-NEXT: [[ADD34:%.*]] = add nsw i64 0, [[MUL33]] +// CHECK4-NEXT: [[CONV35:%.*]] = trunc i64 [[ADD34]] to i32 +// CHECK4-NEXT: store i32 [[CONV35]], i32* [[J12]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32, i32* [[I11]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[J12]], align 4 +// CHECK4-NEXT: [[ADD36:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[I11]], align 4 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP23]] +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[J12]], align 4 +// CHECK4-NEXT: [[ARRAYIDX37:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP24]] +// CHECK4-NEXT: store i32 [[ADD36]], i32* [[ARRAYIDX37]], align 4 +// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK4: omp.body.continue: +// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK4: omp.inner.for.inc: +// CHECK4-NEXT: [[TMP25:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK4-NEXT: [[TMP26:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8 +// CHECK4-NEXT: [[ADD38:%.*]] = add nsw i64 [[TMP25]], [[TMP26]] +// CHECK4-NEXT: store i64 [[ADD38]], i64* [[DOTOMP_IV]], align 8 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK4: omp.inner.for.end: +// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK4: omp.loop.exit: +// CHECK4-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP27]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]]) +// CHECK4-NEXT: br label [[OMP_PRECOND_END]] +// CHECK4: omp.precond.end: +// CHECK4-NEXT: ret void // // -// CHECK16-LABEL: define {{[^@]+}}@__omp_outlined__11 -// CHECK16-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { -// CHECK16-NEXT: entry: -// CHECK16-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK16-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK16-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK16-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 4 -// CHECK16-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK16-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK16-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK16-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK16-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK16-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 -// CHECK16-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK16-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK16-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK16-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK16-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK16-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK16-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK16-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK16-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK16-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK16-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK16: omp.precond.then: -// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK16-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK16-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK16-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 -// CHECK16-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK16-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK16-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK16-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK16-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK16-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] -// CHECK16-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 -// CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK16-NEXT: [[TMP13:%.*]] = load i32*, i32** [[V_ADDR]], align 4 -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i32 [[TMP14]] -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK16-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP16]] -// CHECK16-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX5]], align 4 -// CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK16: omp.body.continue: -// CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK16-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK16-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]] -// CHECK16: omp.inner.for.end: -// CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK16: omp.loop.exit: -// CHECK16-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 -// CHECK16-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) -// CHECK16-NEXT: br label [[OMP_PRECOND_END]] -// CHECK16: omp.precond.end: -// CHECK16-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l59 +// CHECK4-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK4-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK4-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK4-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK4-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK4: .execute: +// CHECK4-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32*, i32** [[V_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK4-NEXT: call void @__omp_outlined__10(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i32]* [[TMP0]], i32* [[TMP4]]) #[[ATTR2]] +// CHECK4-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK4: .omp.deinit: +// CHECK4-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK4-NEXT: br label [[DOTEXIT:%.*]] +// CHECK4: .exit: +// CHECK4-NEXT: ret void +// +// +// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__10 +// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK4-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK4-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK4-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK4: omp.precond.then: +// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK4-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK4-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK4: cond.true: +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: br label [[COND_END:%.*]] +// CHECK4: cond.false: +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: br label [[COND_END]] +// CHECK4: cond.end: +// CHECK4-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK4-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK4: omp.inner.for.cond: +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK4-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4: omp.inner.for.body: +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32*, i32** [[V_ADDR]], align 4 +// CHECK4-NEXT: [[TMP19:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP20:%.*]] = inttoptr i32 [[TMP14]] to i8* +// CHECK4-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP22:%.*]] = inttoptr i32 [[TMP15]] to i8* +// CHECK4-NEXT: store i8* [[TMP22]], i8** [[TMP21]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK4-NEXT: [[TMP24:%.*]] = inttoptr i32 [[TMP17]] to i8* +// CHECK4-NEXT: store i8* [[TMP24]], i8** [[TMP23]], align 4 +// CHECK4-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK4-NEXT: [[TMP26:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* +// CHECK4-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 +// CHECK4-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 +// CHECK4-NEXT: [[TMP28:%.*]] = bitcast i32* [[TMP18]] to i8* +// CHECK4-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 +// CHECK4-NEXT: [[TMP29:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP29]], align 4 +// CHECK4-NEXT: [[TMP31:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK4-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP30]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32*)* @__omp_outlined__11 to i8*), i8* null, i8** [[TMP31]], i32 5) +// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK4: omp.inner.for.inc: +// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP32]], [[TMP33]] +// CHECK4-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP34]], [[TMP35]] +// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP36]], [[TMP37]] +// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP38]], [[TMP39]] +// CHECK4-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK4: cond.true10: +// CHECK4-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: br label [[COND_END12:%.*]] +// CHECK4: cond.false11: +// CHECK4-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: br label [[COND_END12]] +// CHECK4: cond.end12: +// CHECK4-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP40]], [[COND_TRUE10]] ], [ [[TMP41]], [[COND_FALSE11]] ] +// CHECK4-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP42]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK4: omp.inner.for.end: +// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK4: omp.loop.exit: +// CHECK4-NEXT: [[TMP43:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP44:%.*]] = load i32, i32* [[TMP43]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]]) +// CHECK4-NEXT: br label [[OMP_PRECOND_END]] +// CHECK4: omp.precond.end: +// CHECK4-NEXT: ret void +// +// +// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__11 +// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32* [[V:%.*]]) #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK4-NEXT: [[V_ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK4-NEXT: store i32* [[V]], i32** [[V_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK4-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK4-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK4-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK4-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK4-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK4: omp.precond.then: +// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK4-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK4: omp.inner.for.cond: +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK4-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] +// CHECK4-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK4: omp.inner.for.body: +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32*, i32** [[V_ADDR]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP13]], i32 [[TMP14]] +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK4-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP16]] +// CHECK4-NEXT: store i32 [[TMP15]], i32* [[ARRAYIDX5]], align 4 +// CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK4: omp.body.continue: +// CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK4: omp.inner.for.inc: +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK4-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]] +// CHECK4: omp.inner.for.end: +// CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK4: omp.loop.exit: +// CHECK4-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 +// CHECK4-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP20]]) +// CHECK4-NEXT: br label [[OMP_PRECOND_END]] +// CHECK4: omp.precond.end: +// CHECK4-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp index f574cabb3332f..35816730ed9b0 100644 --- a/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/nvptx_target_teams_distribute_parallel_for_simd_codegen.cpp @@ -1,22 +1,10 @@ // NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --function-signature --include-generated-funcs --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" "reduction_size[.].+[.]" "pl_cond[.].+[.|,]" --prefix-filecheck-ir-name _ // Test target codegen - host bc file has to be created first. -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK1 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK2 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK3 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK4 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK5 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-version=45 -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK6 - // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK7 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK8 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK1 // RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK9 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK10 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK11 -// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK12 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2 +// RUN: %clang_cc1 -verify -fopenmp -fopenmp-cuda-mode -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK3 // expected-no-diagnostics #ifndef HEADER @@ -71,3251 +59,6 @@ int bar(int n){ } #endif -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38 -// CHECK1-SAME: (i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK1-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK1: .execute: -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[L_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[L_CASTED]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]], [1000 x i32]* [[TMP0]], i64 [[TMP5]]) #[[ATTR2:[0-9]+]] -// CHECK1-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK1: .omp.deinit: -// CHECK1-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK1-NEXT: br label [[DOTEXIT:%.*]] -// CHECK1: .exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 8 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* @"_openmp_static_kernel$size", align 8 -// CHECK1-NEXT: call void @__kmpc_get_team_static_memory(i16 1, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i64 [[TMP2]], i16 [[TMP1]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TMP3]], i64 0 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct._globalized_locals_ty* -// CHECK1-NEXT: [[L2:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK1: omp.precond.then: -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = zext i32 [[TMP21]] to i64 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP23]], i32* [[CONV8]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[L_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP25]], i32* [[CONV9]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i64, i64* [[L_CASTED]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP28:%.*]] = inttoptr i64 [[TMP20]] to i8* -// CHECK1-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP30:%.*]] = inttoptr i64 [[TMP22]] to i8* -// CHECK1-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 8 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP32:%.*]] = inttoptr i64 [[TMP24]] to i8* -// CHECK1-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP34:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK1-NEXT: [[TMP36:%.*]] = inttoptr i64 [[TMP26]] to i8* -// CHECK1-NEXT: store i8* [[TMP36]], i8** [[TMP35]], align 8 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[TMP37]], align 4 -// CHECK1-NEXT: [[TMP39:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP38]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i64)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP39]], i64 5) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] -// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP44]], [[TMP45]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP46]], [[TMP47]] -// CHECK1-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] -// CHECK1: cond.true14: -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK1-NEXT: br label [[COND_END16:%.*]] -// CHECK1: cond.false15: -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END16]] -// CHECK1: cond.end16: -// CHECK1-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP48]], [[COND_TRUE14]] ], [ [[TMP49]], [[COND_FALSE15]] ] -// CHECK1-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP50]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP51:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[TMP51]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP52]]) -// CHECK1-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP54:%.*]] = icmp ne i32 [[TMP53]], 0 -// CHECK1-NEXT: br i1 [[TMP54]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB18:%.*]] = sub nsw i32 [[TMP55]], 0 -// CHECK1-NEXT: [[DIV19:%.*]] = sdiv i32 [[SUB18]], 1 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV19]], 1 -// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD20]], i32* [[I5]], align 4 -// CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP56:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP57:%.*]] = icmp ne i32 [[TMP56]], 0 -// CHECK1-NEXT: br i1 [[TMP57]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP58:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK1-NEXT: store i32 [[TMP58]], i32* [[CONV1]], align 8 -// CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: br label [[OMP_PRECOND_END]] -// CHECK1: omp.precond.end: -// CHECK1-NEXT: [[TMP59:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK1-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP59]]) -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I6:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK1: omp.precond.then: -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] -// CHECK1: omp.dispatch.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP10]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CONV9:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP11]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK1-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK1-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] -// CHECK1: omp.dispatch.body: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] -// CHECK1: omp.dispatch.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK1-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] -// CHECK1: omp.dispatch.end: -// CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB16:%.*]] = sub nsw i32 [[TMP30]], 0 -// CHECK1-NEXT: [[DIV17:%.*]] = sdiv i32 [[SUB16]], 1 -// CHECK1-NEXT: [[MUL18:%.*]] = mul nsw i32 [[DIV17]], 1 -// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 0, [[MUL18]] -// CHECK1-NEXT: store i32 [[ADD19]], i32* [[I6]], align 4 -// CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK1: .omp.final.done: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK1: .omp.lastprivate.then: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK1-NEXT: store i32 [[TMP33]], i32* [[CONV1]], align 8 -// CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK1: .omp.lastprivate.done: -// CHECK1-NEXT: br label [[OMP_PRECOND_END]] -// CHECK1: omp.precond.end: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44 -// CHECK1-SAME: (i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK1-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK1: .execute: -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] -// CHECK1-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK1: .omp.deinit: -// CHECK1-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK1-NEXT: br label [[DOTEXIT:%.*]] -// CHECK1: .exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK1: omp.precond.then: -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] -// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] -// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to i8* -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to i8* -// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 -// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP27:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP30]], i64 4) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]] -// CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] -// CHECK1: cond.true11: -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: br label [[COND_END13:%.*]] -// CHECK1: cond.false12: -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END13]] -// CHECK1: cond.end13: -// CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE11]] ], [ [[TMP40]], [[COND_FALSE12]] ] -// CHECK1-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP41]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP43]]) -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 -// CHECK1-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP46]], 0 -// CHECK1-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 -// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD17]], i32* [[I3]], align 4 -// CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK1: .omp.final.done: -// CHECK1-NEXT: br label [[OMP_PRECOND_END]] -// CHECK1: omp.precond.end: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK1: omp.precond.then: -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CONV6:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP11]] -// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 -// CHECK1-NEXT: [[CONV8:%.*]] = sext i16 [[TMP14]] to i32 -// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], 1 -// CHECK1-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i16 -// CHECK1-NEXT: store i16 [[CONV10]], i16* [[ARRAYIDX]], align 2 -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK1: .omp.final.then: -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP21]], 0 -// CHECK1-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 -// CHECK1-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 -// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] -// CHECK1-NEXT: store i32 [[ADD15]], i32* [[I5]], align 4 -// CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK1: .omp.final.done: -// CHECK1-NEXT: br label [[OMP_PRECOND_END]] -// CHECK1: omp.precond.end: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 -// CHECK1-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK1-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK1: .execute: -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] -// CHECK1-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK1: .omp.deinit: -// CHECK1-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK1-NEXT: br label [[DOTEXIT:%.*]] -// CHECK1: .exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 -// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to i8* -// CHECK1-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to i8* -// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP17]], i64 3) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 -// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK1: cond.true5: -// CHECK1-NEXT: br label [[COND_END7:%.*]] -// CHECK1: cond.false6: -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END7]] -// CHECK1: cond.end7: -// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] -// CHECK1-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK1: .omp.final.then: -// CHECK1-NEXT: store i32 10, i32* [[I]], align 4 -// CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK1: .omp.final.done: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK1: .omp.final.then: -// CHECK1-NEXT: store i32 10, i32* [[I]], align 4 -// CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK1: .omp.final.done: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 -// CHECK1-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* -// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK1-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK1: .execute: -// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[F_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[F_CASTED]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP3]]) #[[ATTR2]] -// CHECK1-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK1: .omp.deinit: -// CHECK1-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK1-NEXT: br label [[DOTEXIT:%.*]] -// CHECK1: .exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__6 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 -// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[F_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV3]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[F_CASTED]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to i8* -// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to i8* -// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK1-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK1-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP12]] to i8* -// CHECK1-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x [10 x i32]]*, i64)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP21]], i64 4) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP28]], 99 -// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] -// CHECK1: cond.true7: -// CHECK1-NEXT: br label [[COND_END9:%.*]] -// CHECK1: cond.false8: -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: br label [[COND_END9]] -// CHECK1: cond.end9: -// CHECK1-NEXT: [[COND10:%.*]] = phi i32 [ 99, [[COND_TRUE7]] ], [ [[TMP29]], [[COND_FALSE8]] ] -// CHECK1-NEXT: store i32 [[COND10]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP30]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK1: .omp.final.then: -// CHECK1-NEXT: store i32 10, i32* [[I]], align 4 -// CHECK1-NEXT: store i32 10, i32* [[J]], align 4 -// CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK1: .omp.final.done: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__7 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 -// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[CONV4:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV4]], [[TMP7]] -// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 -// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 -// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[DIV5:%.*]] = sdiv i32 [[TMP10]], 10 -// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[DIV5]], 10 -// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL6]] -// CHECK1-NEXT: [[MUL7:%.*]] = mul nsw i32 [[SUB]], 1 -// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL7]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[J]], align 4 -// CHECK1-NEXT: store i32 10, i32* [[K]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP11]], [[MUL9]] -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 -// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD10]], [[TMP14]] -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 -// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM12]] -// CHECK1-NEXT: store i32 [[ADD11]], i32* [[ARRAYIDX13]], align 4 -// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK1: omp.body.continue: -// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] -// CHECK1: omp.inner.for.end: -// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK1: omp.loop.exit: -// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK1: .omp.final.then: -// CHECK1-NEXT: store i32 10, i32* [[I]], align 4 -// CHECK1-NEXT: store i32 10, i32* [[J]], align 4 -// CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK1: .omp.final.done: -// CHECK1-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38 -// CHECK2-SAME: (i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK2: .execute: -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[L_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[L_CASTED]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]], [1000 x i32]* [[TMP0]], i64 [[TMP5]]) #[[ATTR2:[0-9]+]] -// CHECK2-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK2: .omp.deinit: -// CHECK2-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK2-NEXT: br label [[DOTEXIT:%.*]] -// CHECK2: .exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 8 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 1) -// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct._globalized_locals_ty* -// CHECK2-NEXT: [[L2:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP2]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK2-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] -// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK2: omp.precond.then: -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK2: cond.true: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: br label [[COND_END:%.*]] -// CHECK2: cond.false: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: br label [[COND_END]] -// CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] -// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] -// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK2-NEXT: [[CONV9:%.*]] = bitcast i64* [[L_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i64, i64* [[L_CASTED]], align 8 -// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 -// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP19]] to i8* -// CHECK2-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 -// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP29:%.*]] = inttoptr i64 [[TMP21]] to i8* -// CHECK2-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 8 -// CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK2-NEXT: [[TMP31:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 8 -// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 -// CHECK2-NEXT: [[TMP33:%.*]] = inttoptr i64 [[TMP23]] to i8* -// CHECK2-NEXT: store i8* [[TMP33]], i8** [[TMP32]], align 8 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[TMP34]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP35]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i64)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP36]], i64 5) -// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] -// CHECK2-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] -// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP41]], [[TMP42]] -// CHECK2-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP43]], [[TMP44]] -// CHECK2-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] -// CHECK2: cond.true14: -// CHECK2-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 -// CHECK2-NEXT: br label [[COND_END16:%.*]] -// CHECK2: cond.false15: -// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: br label [[COND_END16]] -// CHECK2: cond.end16: -// CHECK2-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP45]], [[COND_TRUE14]] ], [ [[TMP46]], [[COND_FALSE15]] ] -// CHECK2-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP47]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] -// CHECK2: omp.inner.for.end: -// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP48:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[TMP48]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP49]]) -// CHECK2-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0 -// CHECK2-NEXT: br i1 [[TMP51]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK2: .omp.final.then: -// CHECK2-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB18:%.*]] = sub nsw i32 [[TMP52]], 0 -// CHECK2-NEXT: [[DIV19:%.*]] = sdiv i32 [[SUB18]], 1 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV19]], 1 -// CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD20]], i32* [[I5]], align 4 -// CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK2: .omp.final.done: -// CHECK2-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP54:%.*]] = icmp ne i32 [[TMP53]], 0 -// CHECK2-NEXT: br i1 [[TMP54]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK2: .omp.lastprivate.then: -// CHECK2-NEXT: [[TMP55:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK2-NEXT: store i32 [[TMP55]], i32* [[CONV1]], align 8 -// CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK2: .omp.lastprivate.done: -// CHECK2-NEXT: br label [[OMP_PRECOND_END]] -// CHECK2: omp.precond.end: -// CHECK2-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP1]]) -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 -// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I6:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK2-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK2: omp.precond.then: -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK2-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) -// CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] -// CHECK2: omp.dispatch.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV7:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP10]] -// CHECK2-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK2: cond.true: -// CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: br label [[COND_END:%.*]] -// CHECK2: cond.false: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CONV9:%.*]] = sext i32 [[TMP12]] to i64 -// CHECK2-NEXT: br label [[COND_END]] -// CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i64 [ [[TMP11]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] -// CHECK2-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 -// CHECK2-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] -// CHECK2: omp.dispatch.body: -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK2-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 -// CHECK2-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 -// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK2: omp.body.continue: -// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK2-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] -// CHECK2: omp.inner.for.end: -// CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] -// CHECK2: omp.dispatch.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK2-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK2-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] -// CHECK2: omp.dispatch.end: -// CHECK2-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK2-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK2: .omp.final.then: -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB16:%.*]] = sub nsw i32 [[TMP30]], 0 -// CHECK2-NEXT: [[DIV17:%.*]] = sdiv i32 [[SUB16]], 1 -// CHECK2-NEXT: [[MUL18:%.*]] = mul nsw i32 [[DIV17]], 1 -// CHECK2-NEXT: [[ADD19:%.*]] = add nsw i32 0, [[MUL18]] -// CHECK2-NEXT: store i32 [[ADD19]], i32* [[I6]], align 4 -// CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK2: .omp.final.done: -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK2-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK2: .omp.lastprivate.then: -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[CONV1]], align 8 -// CHECK2-NEXT: store i32 [[TMP33]], i32* [[CONV1]], align 8 -// CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK2: .omp.lastprivate.done: -// CHECK2-NEXT: br label [[OMP_PRECOND_END]] -// CHECK2: omp.precond.end: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44 -// CHECK2-SAME: (i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK2: .execute: -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] -// CHECK2-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK2: .omp.deinit: -// CHECK2-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK2-NEXT: br label [[DOTEXIT:%.*]] -// CHECK2: .exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 -// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK2: omp.precond.then: -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] -// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK2: cond.true: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: br label [[COND_END:%.*]] -// CHECK2: cond.false: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: br label [[COND_END]] -// CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] -// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] -// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to i8* -// CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to i8* -// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 -// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to i8* -// CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 -// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK2-NEXT: [[TMP27:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 -// CHECK2-NEXT: [[TMP30:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP30]], i64 4) -// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]] -// CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] -// CHECK2: cond.true11: -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: br label [[COND_END13:%.*]] -// CHECK2: cond.false12: -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: br label [[COND_END13]] -// CHECK2: cond.end13: -// CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE11]] ], [ [[TMP40]], [[COND_FALSE12]] ] -// CHECK2-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP41]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] -// CHECK2: omp.inner.for.end: -// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP43]]) -// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 -// CHECK2-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK2: .omp.final.then: -// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP46]], 0 -// CHECK2-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 -// CHECK2-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD17]], i32* [[I3]], align 4 -// CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK2: .omp.final.done: -// CHECK2-NEXT: br label [[OMP_PRECOND_END]] -// CHECK2: omp.precond.end: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 -// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I5:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* -// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK2: omp.precond.then: -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 -// CHECK2-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CONV6:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP11]] -// CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[I5]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 -// CHECK2-NEXT: [[CONV8:%.*]] = sext i16 [[TMP14]] to i32 -// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], 1 -// CHECK2-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i16 -// CHECK2-NEXT: store i16 [[CONV10]], i16* [[ARRAYIDX]], align 2 -// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK2: omp.body.continue: -// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK2-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] -// CHECK2: omp.inner.for.end: -// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK2: omp.loop.exit: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK2: .omp.final.then: -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP21]], 0 -// CHECK2-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 -// CHECK2-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 -// CHECK2-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] -// CHECK2-NEXT: store i32 [[ADD15]], i32* [[I5]], align 4 -// CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK2: .omp.final.done: -// CHECK2-NEXT: br label [[OMP_PRECOND_END]] -// CHECK2: omp.precond.end: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 -// CHECK2-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK2: .execute: -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] -// CHECK2-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK2: .omp.deinit: -// CHECK2-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK2-NEXT: br label [[DOTEXIT:%.*]] -// CHECK2: .exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 -// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK2: cond.true: -// CHECK2-NEXT: br label [[COND_END:%.*]] -// CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: br label [[COND_END]] -// CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] -// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 -// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to i8* -// CHECK2-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to i8* -// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP16:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK2-NEXT: [[TMP17:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP17]], i64 3) -// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 -// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK2: cond.true5: -// CHECK2-NEXT: br label [[COND_END7:%.*]] -// CHECK2: cond.false6: -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: br label [[COND_END7]] -// CHECK2: cond.end7: -// CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] -// CHECK2-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] -// CHECK2: omp.inner.for.end: -// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 -// CHECK2-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK2: .omp.final.then: -// CHECK2-NEXT: store i32 10, i32* [[I]], align 4 -// CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK2: .omp.final.done: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 -// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK2-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]] -// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 -// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK2: omp.body.continue: -// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] -// CHECK2: omp.inner.for.end: -// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK2-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK2: .omp.final.then: -// CHECK2-NEXT: store i32 10, i32* [[I]], align 4 -// CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK2: .omp.final.done: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 -// CHECK2-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* -// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK2: .execute: -// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[F_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i64, i64* [[F_CASTED]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP3]]) #[[ATTR2]] -// CHECK2-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK2: .omp.deinit: -// CHECK2-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK2-NEXT: br label [[DOTEXIT:%.*]] -// CHECK2: .exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__6 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 -// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK2: cond.true: -// CHECK2-NEXT: br label [[COND_END:%.*]] -// CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: br label [[COND_END]] -// CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] -// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 -// CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = bitcast i64* [[F_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP11]], i32* [[CONV3]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[F_CASTED]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to i8* -// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to i8* -// CHECK2-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 -// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP18:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK2-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8 -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 -// CHECK2-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP12]] to i8* -// CHECK2-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x [10 x i32]]*, i64)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP21]], i64 4) -// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK2-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP28]], 99 -// CHECK2-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] -// CHECK2: cond.true7: -// CHECK2-NEXT: br label [[COND_END9:%.*]] -// CHECK2: cond.false8: -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: br label [[COND_END9]] -// CHECK2: cond.end9: -// CHECK2-NEXT: [[COND10:%.*]] = phi i32 [ 99, [[COND_TRUE7]] ], [ [[TMP29]], [[COND_FALSE8]] ] -// CHECK2-NEXT: store i32 [[COND10]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP30]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] -// CHECK2: omp.inner.for.end: -// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK2-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK2: .omp.final.then: -// CHECK2-NEXT: store i32 10, i32* [[I]], align 4 -// CHECK2-NEXT: store i32 10, i32* [[J]], align 4 -// CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK2: .omp.final.done: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__7 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 -// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 -// CHECK2-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP2]] to i32 -// CHECK2-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 -// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[CONV4:%.*]] = sext i32 [[TMP6]] to i64 -// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 -// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV4]], [[TMP7]] -// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 -// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 -// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[DIV5:%.*]] = sdiv i32 [[TMP10]], 10 -// CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[DIV5]], 10 -// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL6]] -// CHECK2-NEXT: [[MUL7:%.*]] = mul nsw i32 [[SUB]], 1 -// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL7]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[J]], align 4 -// CHECK2-NEXT: store i32 10, i32* [[K]], align 4 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP11]], [[MUL9]] -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 -// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD10]], [[TMP14]] -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 -// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 -// CHECK2-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP16]] to i64 -// CHECK2-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM12]] -// CHECK2-NEXT: store i32 [[ADD11]], i32* [[ARRAYIDX13]], align 4 -// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK2: omp.body.continue: -// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK2-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] -// CHECK2: omp.inner.for.end: -// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK2: omp.loop.exit: -// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK2: .omp.final.then: -// CHECK2-NEXT: store i32 10, i32* [[I]], align 4 -// CHECK2-NEXT: store i32 10, i32* [[J]], align 4 -// CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK2: .omp.final.done: -// CHECK2-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38 -// CHECK3-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK3-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK3: .execute: -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[L_CASTED]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[L_CASTED]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i32]* [[TMP0]], i32 [[TMP5]]) #[[ATTR2:[0-9]+]] -// CHECK3-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK3: .omp.deinit: -// CHECK3-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK3-NEXT: br label [[DOTEXIT:%.*]] -// CHECK3: .exit: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK3-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 4 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* @"_openmp_static_kernel$size", align 4 -// CHECK3-NEXT: call void @__kmpc_get_team_static_memory(i16 1, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i32 [[TMP2]], i16 [[TMP1]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) -// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TMP3]], i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct._globalized_locals_ty* -// CHECK3-NEXT: [[L1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP7]], 0 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK3-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP8]] -// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK3: omp.precond.then: -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP11]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP14]], [[COND_TRUE]] ], [ [[TMP15]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP17]], [[ADD]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP21]], i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP23]], i32* [[L_CASTED]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[L_CASTED]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP26:%.*]] = inttoptr i32 [[TMP19]] to i8* -// CHECK3-NEXT: store i8* [[TMP26]], i8** [[TMP25]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP28:%.*]] = inttoptr i32 [[TMP20]] to i8* -// CHECK3-NEXT: store i8* [[TMP28]], i8** [[TMP27]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP30:%.*]] = inttoptr i32 [[TMP22]] to i8* -// CHECK3-NEXT: store i8* [[TMP30]], i8** [[TMP29]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP32:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: store i8* [[TMP32]], i8** [[TMP31]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 -// CHECK3-NEXT: [[TMP34:%.*]] = inttoptr i32 [[TMP24]] to i8* -// CHECK3-NEXT: store i8* [[TMP34]], i8** [[TMP33]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[TMP35]], align 4 -// CHECK3-NEXT: [[TMP37:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP36]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP37]], i32 5) -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP38]], [[TMP39]] -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP40]], [[TMP41]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP42]], [[TMP43]] -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP44]], [[TMP45]] -// CHECK3-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] -// CHECK3: cond.true11: -// CHECK3-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK3-NEXT: br label [[COND_END13:%.*]] -// CHECK3: cond.false12: -// CHECK3-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END13]] -// CHECK3: cond.end13: -// CHECK3-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP46]], [[COND_TRUE11]] ], [ [[TMP47]], [[COND_FALSE12]] ] -// CHECK3-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP48]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP49:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP50:%.*]] = load i32, i32* [[TMP49]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP50]]) -// CHECK3-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP52:%.*]] = icmp ne i32 [[TMP51]], 0 -// CHECK3-NEXT: br i1 [[TMP52]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP53]], 0 -// CHECK3-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 -// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD17]], i32* [[I4]], align 4 -// CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP55:%.*]] = icmp ne i32 [[TMP54]], 0 -// CHECK3-NEXT: br i1 [[TMP55]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP56:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP56]], i32* [[L_ADDR]], align 4 -// CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK3: .omp.lastprivate.done: -// CHECK3-NEXT: br label [[OMP_PRECOND_END]] -// CHECK3: omp.precond.end: -// CHECK3-NEXT: [[TMP57:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK3-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP57]]) -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK3-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK3: omp.precond.then: -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) -// CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] -// CHECK3: omp.dispatch.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] -// CHECK3: omp.dispatch.body: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK3-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK3-NEXT: store i32 [[TMP20]], i32* [[L_ADDR]], align 4 -// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK3: omp.body.continue: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] -// CHECK3: omp.dispatch.inc: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] -// CHECK3: omp.dispatch.end: -// CHECK3-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP30]], 0 -// CHECK3-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 -// CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 -// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK3-NEXT: store i32 [[ADD13]], i32* [[I3]], align 4 -// CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK3: .omp.final.done: -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK3: .omp.lastprivate.then: -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP33]], i32* [[L_ADDR]], align 4 -// CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK3: .omp.lastprivate.done: -// CHECK3-NEXT: br label [[OMP_PRECOND_END]] -// CHECK3: omp.precond.end: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44 -// CHECK3-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK3: .execute: -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] -// CHECK3-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK3: .omp.deinit: -// CHECK3-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK3-NEXT: br label [[DOTEXIT:%.*]] -// CHECK3: .exit: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK3: omp.precond.then: -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] -// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] -// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to i8* -// CHECK3-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to i8* -// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP25:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* -// CHECK3-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP28]], i32 4) -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] -// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] -// CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] -// CHECK3: cond.true10: -// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: br label [[COND_END12:%.*]] -// CHECK3: cond.false11: -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END12]] -// CHECK3: cond.end12: -// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] -// CHECK3-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP39]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP41]]) -// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 -// CHECK3-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP44]], 0 -// CHECK3-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 -// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD16]], i32* [[I3]], align 4 -// CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK3: .omp.final.done: -// CHECK3-NEXT: br label [[OMP_PRECOND_END]] -// CHECK3: omp.precond.end: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK3: omp.precond.then: -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1 -// CHECK3-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK3-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX]], align 2 -// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK3: omp.body.continue: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK3: .omp.final.then: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP21]], 0 -// CHECK3-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK3-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 -// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK3-NEXT: store i32 [[ADD11]], i32* [[I3]], align 4 -// CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK3: .omp.final.done: -// CHECK3-NEXT: br label [[OMP_PRECOND_END]] -// CHECK3: omp.precond.end: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 -// CHECK3-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK3: .execute: -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] -// CHECK3-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK3: .omp.deinit: -// CHECK3-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK3-NEXT: br label [[DOTEXIT:%.*]] -// CHECK3: .exit: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 -// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 -// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK3-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK3-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK3-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP15]], i32 3) -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 -// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK3: cond.true5: -// CHECK3-NEXT: br label [[COND_END7:%.*]] -// CHECK3: cond.false6: -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END7]] -// CHECK3: cond.end7: -// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] -// CHECK3-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK3: .omp.final.then: -// CHECK3-NEXT: store i32 10, i32* [[I]], align 4 -// CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK3: .omp.final.done: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 -// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK3: omp.body.continue: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK3: .omp.final.then: -// CHECK3-NEXT: store i32 10, i32* [[I]], align 4 -// CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK3: .omp.final.done: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 -// CHECK3-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK3: .execute: -// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[F_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[F_CASTED]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[F_CASTED]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP3]]) #[[ATTR2]] -// CHECK3-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK3: .omp.deinit: -// CHECK3-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK3-NEXT: br label [[DOTEXIT:%.*]] -// CHECK3: .exit: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__6 -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 -// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 -// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP9]], i32* [[F_CASTED]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[F_CASTED]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK3-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK3-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP16:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK3-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK3-NEXT: [[TMP18:%.*]] = inttoptr i32 [[TMP10]] to i8* -// CHECK3-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x [10 x i32]]*, i32)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP19]], i32 4) -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 -// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] -// CHECK3: cond.true6: -// CHECK3-NEXT: br label [[COND_END8:%.*]] -// CHECK3: cond.false7: -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: br label [[COND_END8]] -// CHECK3: cond.end8: -// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] -// CHECK3-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK3-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK3: .omp.final.then: -// CHECK3-NEXT: store i32 10, i32* [[I]], align 4 -// CHECK3-NEXT: store i32 10, i32* [[J]], align 4 -// CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK3: .omp.final.done: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__7 -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 -// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK3-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 -// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 -// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10 -// CHECK3-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10 -// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]] -// CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 -// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[J]], align 4 -// CHECK3-NEXT: store i32 10, i32* [[K]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[F_ADDR]], align 4 -// CHECK3-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 -// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]] -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP15]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 -// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP16]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX9]], align 4 -// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK3: omp.body.continue: -// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] -// CHECK3: omp.inner.for.end: -// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK3: omp.loop.exit: -// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK3: .omp.final.then: -// CHECK3-NEXT: store i32 10, i32* [[I]], align 4 -// CHECK3-NEXT: store i32 10, i32* [[J]], align 4 -// CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK3: .omp.final.done: -// CHECK3-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38 // CHECK4-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK4-NEXT: entry: @@ -3350,8 +93,6 @@ int bar(int n){ // CHECK4-NEXT: br label [[DOTEXIT:%.*]] // CHECK4: .exit: // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { // CHECK4-NEXT: entry: @@ -3517,8 +258,6 @@ int bar(int n){ // CHECK4-NEXT: [[TMP57:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 // CHECK4-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP57]]) // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__1 // CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { // CHECK4-NEXT: entry: @@ -3656,8 +395,6 @@ int bar(int n){ // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44 // CHECK4-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK4-NEXT: entry: @@ -3686,8 +423,6 @@ int bar(int n){ // CHECK4-NEXT: br label [[DOTEXIT:%.*]] // CHECK4: .exit: // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__2 // CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK4-NEXT: entry: @@ -3828,8 +563,6 @@ int bar(int n){ // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__3 // CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK4-NEXT: entry: @@ -3930,8 +663,6 @@ int bar(int n){ // CHECK4-NEXT: br label [[OMP_PRECOND_END]] // CHECK4: omp.precond.end: // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 // CHECK4-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK4-NEXT: entry: @@ -3954,8 +685,6 @@ int bar(int n){ // CHECK4-NEXT: br label [[DOTEXIT:%.*]] // CHECK4: .exit: // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__4 // CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK4-NEXT: entry: @@ -4054,8 +783,6 @@ int bar(int n){ // CHECK4-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK4: .omp.final.done: // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__5 // CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK4-NEXT: entry: @@ -4127,8 +854,6 @@ int bar(int n){ // CHECK4-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK4: .omp.final.done: // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 // CHECK4-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { // CHECK4-NEXT: entry: @@ -4157,8 +882,6 @@ int bar(int n){ // CHECK4-NEXT: br label [[DOTEXIT:%.*]] // CHECK4: .exit: // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__6 // CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { // CHECK4-NEXT: entry: @@ -4270,8 +993,6 @@ int bar(int n){ // CHECK4-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK4: .omp.final.done: // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__7 // CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { // CHECK4-NEXT: entry: @@ -4366,8 +1087,6 @@ int bar(int n){ // CHECK4-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK4: .omp.final.done: // CHECK4-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38 // CHECK5-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK5-NEXT: entry: @@ -4402,8 +1121,6 @@ int bar(int n){ // CHECK5-NEXT: br label [[DOTEXIT:%.*]] // CHECK5: .exit: // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: @@ -4564,8 +1281,6 @@ int bar(int n){ // CHECK5: omp.precond.end: // CHECK5-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP1]]) // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__1 // CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: @@ -4703,8 +1418,6 @@ int bar(int n){ // CHECK5-NEXT: br label [[OMP_PRECOND_END]] // CHECK5: omp.precond.end: // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44 // CHECK5-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: @@ -4733,8 +1446,6 @@ int bar(int n){ // CHECK5-NEXT: br label [[DOTEXIT:%.*]] // CHECK5: .exit: // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__2 // CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: @@ -4875,8 +1586,6 @@ int bar(int n){ // CHECK5-NEXT: br label [[OMP_PRECOND_END]] // CHECK5: omp.precond.end: // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__3 // CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: @@ -4977,8 +1686,6 @@ int bar(int n){ // CHECK5-NEXT: br label [[OMP_PRECOND_END]] // CHECK5: omp.precond.end: // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 // CHECK5-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: @@ -5001,8 +1708,6 @@ int bar(int n){ // CHECK5-NEXT: br label [[DOTEXIT:%.*]] // CHECK5: .exit: // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__4 // CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: @@ -5101,8 +1806,6 @@ int bar(int n){ // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__5 // CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: @@ -5174,8 +1877,6 @@ int bar(int n){ // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 // CHECK5-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: @@ -5204,8 +1905,6 @@ int bar(int n){ // CHECK5-NEXT: br label [[DOTEXIT:%.*]] // CHECK5: .exit: // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__6 // CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: @@ -5317,8 +2016,6 @@ int bar(int n){ // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__7 // CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { // CHECK5-NEXT: entry: @@ -5413,8 +2110,6 @@ int bar(int n){ // CHECK5-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK5: .omp.final.done: // CHECK5-NEXT: ret void -// -// // CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38 // CHECK6-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK6-NEXT: entry: @@ -5449,8 +2144,6 @@ int bar(int n){ // CHECK6-NEXT: br label [[DOTEXIT:%.*]] // CHECK6: .exit: // CHECK6-NEXT: ret void -// -// // CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { // CHECK6-NEXT: entry: @@ -5611,8 +2304,6 @@ int bar(int n){ // CHECK6: omp.precond.end: // CHECK6-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP1]]) // CHECK6-NEXT: ret void -// -// // CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__1 // CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { // CHECK6-NEXT: entry: @@ -5750,8 +2441,6 @@ int bar(int n){ // CHECK6-NEXT: br label [[OMP_PRECOND_END]] // CHECK6: omp.precond.end: // CHECK6-NEXT: ret void -// -// // CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44 // CHECK6-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK6-NEXT: entry: @@ -5780,8 +2469,6 @@ int bar(int n){ // CHECK6-NEXT: br label [[DOTEXIT:%.*]] // CHECK6: .exit: // CHECK6-NEXT: ret void -// -// // CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__2 // CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK6-NEXT: entry: @@ -5922,8 +2609,6 @@ int bar(int n){ // CHECK6-NEXT: br label [[OMP_PRECOND_END]] // CHECK6: omp.precond.end: // CHECK6-NEXT: ret void -// -// // CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__3 // CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK6-NEXT: entry: @@ -6024,8 +2709,6 @@ int bar(int n){ // CHECK6-NEXT: br label [[OMP_PRECOND_END]] // CHECK6: omp.precond.end: // CHECK6-NEXT: ret void -// -// // CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 // CHECK6-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK6-NEXT: entry: @@ -6048,8 +2731,6 @@ int bar(int n){ // CHECK6-NEXT: br label [[DOTEXIT:%.*]] // CHECK6: .exit: // CHECK6-NEXT: ret void -// -// // CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__4 // CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK6-NEXT: entry: @@ -6148,8 +2829,6 @@ int bar(int n){ // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK6: .omp.final.done: // CHECK6-NEXT: ret void -// -// // CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__5 // CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK6-NEXT: entry: @@ -6221,8 +2900,6 @@ int bar(int n){ // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK6: .omp.final.done: // CHECK6-NEXT: ret void -// -// // CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 // CHECK6-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { // CHECK6-NEXT: entry: @@ -6251,8 +2928,6 @@ int bar(int n){ // CHECK6-NEXT: br label [[DOTEXIT:%.*]] // CHECK6: .exit: // CHECK6-NEXT: ret void -// -// // CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__6 // CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { // CHECK6-NEXT: entry: @@ -6364,8 +3039,6 @@ int bar(int n){ // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK6: .omp.final.done: // CHECK6-NEXT: ret void -// -// // CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__7 // CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { // CHECK6-NEXT: entry: @@ -6460,8 +3133,6 @@ int bar(int n){ // CHECK6-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK6: .omp.final.done: // CHECK6-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38 // CHECK7-SAME: (i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK7-NEXT: entry: @@ -6500,8 +3171,6 @@ int bar(int n){ // CHECK7-NEXT: br label [[DOTEXIT:%.*]] // CHECK7: .exit: // CHECK7-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: @@ -6673,8 +3342,6 @@ int bar(int n){ // CHECK7-NEXT: [[TMP59:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 // CHECK7-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP59]]) // CHECK7-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@__omp_outlined__1 // CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: @@ -6820,8 +3487,6 @@ int bar(int n){ // CHECK7-NEXT: br label [[OMP_PRECOND_END]] // CHECK7: omp.precond.end: // CHECK7-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44 // CHECK7-SAME: (i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: @@ -6852,8 +3517,6 @@ int bar(int n){ // CHECK7-NEXT: br label [[DOTEXIT:%.*]] // CHECK7: .exit: // CHECK7-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@__omp_outlined__2 // CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: @@ -6998,8 +3661,6 @@ int bar(int n){ // CHECK7-NEXT: br label [[OMP_PRECOND_END]] // CHECK7: omp.precond.end: // CHECK7-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@__omp_outlined__3 // CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: @@ -7105,8 +3766,6 @@ int bar(int n){ // CHECK7-NEXT: br label [[OMP_PRECOND_END]] // CHECK7: omp.precond.end: // CHECK7-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 // CHECK7-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: @@ -7129,8 +3788,6 @@ int bar(int n){ // CHECK7-NEXT: br label [[DOTEXIT:%.*]] // CHECK7: .exit: // CHECK7-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@__omp_outlined__4 // CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: @@ -7231,8 +3888,6 @@ int bar(int n){ // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: // CHECK7-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@__omp_outlined__5 // CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: @@ -7308,8 +3963,6 @@ int bar(int n){ // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: // CHECK7-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 // CHECK7-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: @@ -7340,8 +3993,6 @@ int bar(int n){ // CHECK7-NEXT: br label [[DOTEXIT:%.*]] // CHECK7: .exit: // CHECK7-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@__omp_outlined__6 // CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: @@ -7457,8 +4108,6 @@ int bar(int n){ // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: // CHECK7-NEXT: ret void -// -// // CHECK7-LABEL: define {{[^@]+}}@__omp_outlined__7 // CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { // CHECK7-NEXT: entry: @@ -7559,8 +4208,6 @@ int bar(int n){ // CHECK7-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK7: .omp.final.done: // CHECK7-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38 // CHECK8-SAME: (i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK8-NEXT: entry: @@ -7599,8 +4246,6 @@ int bar(int n){ // CHECK8-NEXT: br label [[DOTEXIT:%.*]] // CHECK8: .exit: // CHECK8-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: @@ -7767,8 +4412,6 @@ int bar(int n){ // CHECK8: omp.precond.end: // CHECK8-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP1]]) // CHECK8-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@__omp_outlined__1 // CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: @@ -7914,8 +4557,6 @@ int bar(int n){ // CHECK8-NEXT: br label [[OMP_PRECOND_END]] // CHECK8: omp.precond.end: // CHECK8-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44 // CHECK8-SAME: (i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: @@ -7946,8 +4587,6 @@ int bar(int n){ // CHECK8-NEXT: br label [[DOTEXIT:%.*]] // CHECK8: .exit: // CHECK8-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@__omp_outlined__2 // CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: @@ -8092,8 +4731,6 @@ int bar(int n){ // CHECK8-NEXT: br label [[OMP_PRECOND_END]] // CHECK8: omp.precond.end: // CHECK8-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@__omp_outlined__3 // CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: @@ -8199,8 +4836,6 @@ int bar(int n){ // CHECK8-NEXT: br label [[OMP_PRECOND_END]] // CHECK8: omp.precond.end: // CHECK8-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 // CHECK8-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: @@ -8223,8 +4858,6 @@ int bar(int n){ // CHECK8-NEXT: br label [[DOTEXIT:%.*]] // CHECK8: .exit: // CHECK8-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@__omp_outlined__4 // CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: @@ -8325,8 +4958,6 @@ int bar(int n){ // CHECK8-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK8: .omp.final.done: // CHECK8-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@__omp_outlined__5 // CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: @@ -8402,8 +5033,6 @@ int bar(int n){ // CHECK8-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK8: .omp.final.done: // CHECK8-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 // CHECK8-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: @@ -8434,8 +5063,6 @@ int bar(int n){ // CHECK8-NEXT: br label [[DOTEXIT:%.*]] // CHECK8: .exit: // CHECK8-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@__omp_outlined__6 // CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: @@ -8551,8 +5178,6 @@ int bar(int n){ // CHECK8-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK8: .omp.final.done: // CHECK8-NEXT: ret void -// -// // CHECK8-LABEL: define {{[^@]+}}@__omp_outlined__7 // CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { // CHECK8-NEXT: entry: @@ -8653,8 +5278,6 @@ int bar(int n){ // CHECK8-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK8: .omp.final.done: // CHECK8-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38 // CHECK9-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK9-NEXT: entry: @@ -8689,8 +5312,6 @@ int bar(int n){ // CHECK9-NEXT: br label [[DOTEXIT:%.*]] // CHECK9: .exit: // CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: @@ -8856,8 +5477,6 @@ int bar(int n){ // CHECK9-NEXT: [[TMP57:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 // CHECK9-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP57]]) // CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@__omp_outlined__1 // CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: @@ -8995,8 +5614,6 @@ int bar(int n){ // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44 // CHECK9-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: @@ -9025,8 +5642,6 @@ int bar(int n){ // CHECK9-NEXT: br label [[DOTEXIT:%.*]] // CHECK9: .exit: // CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@__omp_outlined__2 // CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: @@ -9167,8 +5782,6 @@ int bar(int n){ // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@__omp_outlined__3 // CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: @@ -9269,8 +5882,6 @@ int bar(int n){ // CHECK9-NEXT: br label [[OMP_PRECOND_END]] // CHECK9: omp.precond.end: // CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 // CHECK9-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: @@ -9293,8 +5904,6 @@ int bar(int n){ // CHECK9-NEXT: br label [[DOTEXIT:%.*]] // CHECK9: .exit: // CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@__omp_outlined__4 // CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: @@ -9393,8 +6002,6 @@ int bar(int n){ // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@__omp_outlined__5 // CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: @@ -9466,8 +6073,6 @@ int bar(int n){ // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 // CHECK9-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: @@ -9496,8 +6101,6 @@ int bar(int n){ // CHECK9-NEXT: br label [[DOTEXIT:%.*]] // CHECK9: .exit: // CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@__omp_outlined__6 // CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: @@ -9609,8 +6212,6 @@ int bar(int n){ // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: ret void -// -// // CHECK9-LABEL: define {{[^@]+}}@__omp_outlined__7 // CHECK9-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { // CHECK9-NEXT: entry: @@ -9705,8 +6306,6 @@ int bar(int n){ // CHECK9-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK9: .omp.final.done: // CHECK9-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38 // CHECK10-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK10-NEXT: entry: @@ -9741,8 +6340,6 @@ int bar(int n){ // CHECK10-NEXT: br label [[DOTEXIT:%.*]] // CHECK10: .exit: // CHECK10-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK10-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: @@ -9908,8 +6505,6 @@ int bar(int n){ // CHECK10-NEXT: [[TMP57:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 // CHECK10-NEXT: call void @__kmpc_restore_team_static_memory(i16 1, i16 [[TMP57]]) // CHECK10-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@__omp_outlined__1 // CHECK10-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: @@ -10047,8 +6642,6 @@ int bar(int n){ // CHECK10-NEXT: br label [[OMP_PRECOND_END]] // CHECK10: omp.precond.end: // CHECK10-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44 // CHECK10-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: @@ -10077,8 +6670,6 @@ int bar(int n){ // CHECK10-NEXT: br label [[DOTEXIT:%.*]] // CHECK10: .exit: // CHECK10-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@__omp_outlined__2 // CHECK10-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: @@ -10219,8 +6810,6 @@ int bar(int n){ // CHECK10-NEXT: br label [[OMP_PRECOND_END]] // CHECK10: omp.precond.end: // CHECK10-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@__omp_outlined__3 // CHECK10-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: @@ -10321,8 +6910,6 @@ int bar(int n){ // CHECK10-NEXT: br label [[OMP_PRECOND_END]] // CHECK10: omp.precond.end: // CHECK10-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 // CHECK10-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: @@ -10345,8 +6932,6 @@ int bar(int n){ // CHECK10-NEXT: br label [[DOTEXIT:%.*]] // CHECK10: .exit: // CHECK10-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@__omp_outlined__4 // CHECK10-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: @@ -10445,8 +7030,6 @@ int bar(int n){ // CHECK10-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK10: .omp.final.done: // CHECK10-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@__omp_outlined__5 // CHECK10-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: @@ -10518,8 +7101,6 @@ int bar(int n){ // CHECK10-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK10: .omp.final.done: // CHECK10-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 // CHECK10-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: @@ -10548,8 +7129,6 @@ int bar(int n){ // CHECK10-NEXT: br label [[DOTEXIT:%.*]] // CHECK10: .exit: // CHECK10-NEXT: ret void -// -// // CHECK10-LABEL: define {{[^@]+}}@__omp_outlined__6 // CHECK10-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: @@ -10660,9 +7239,7 @@ int bar(int n){ // CHECK10-NEXT: store i32 10, i32* [[J]], align 4 // CHECK10-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK10: .omp.final.done: -// CHECK10-NEXT: ret void -// -// +// CHECK10-NEXT: ret void // CHECK10-LABEL: define {{[^@]+}}@__omp_outlined__7 // CHECK10-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { // CHECK10-NEXT: entry: @@ -10757,2098 +7334,5233 @@ int bar(int n){ // CHECK10-NEXT: br label [[DOTOMP_FINAL_DONE]] // CHECK10: .omp.final.done: // CHECK10-NEXT: ret void -// -// // CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38 // CHECK11-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] { // CHECK11-NEXT: entry: -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK11-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK11-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK11-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK11-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK11: .execute: +// CHECK11-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[L_CASTED]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK11-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i32]* [[TMP0]], i32 [[TMP5]]) #[[ATTR2:[0-9]+]] +// CHECK11-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK11: .omp.deinit: +// CHECK11-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK11-NEXT: br label [[DOTEXIT:%.*]] +// CHECK11: .exit: +// CHECK11-NEXT: ret void +// CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK11-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 4, i16 1) +// CHECK11-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct._globalized_locals_ty* +// CHECK11-NEXT: [[L1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP2]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK11: omp.precond.then: +// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11: cond.true: +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: br label [[COND_END:%.*]] +// CHECK11: cond.false: +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: br label [[COND_END]] +// CHECK11: cond.end: +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK11: omp.inner.for.cond: +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11: omp.inner.for.body: +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[L_CASTED]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP16]] to i8* +// CHECK11-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to i8* +// CHECK11-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP27:%.*]] = inttoptr i32 [[TMP19]] to i8* +// CHECK11-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP29:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* +// CHECK11-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 +// CHECK11-NEXT: [[TMP31:%.*]] = inttoptr i32 [[TMP21]] to i8* +// CHECK11-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK11-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP34]], i32 5) +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK11: omp.inner.for.inc: +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] +// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] +// CHECK11-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK11: cond.true11: +// CHECK11-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK11-NEXT: br label [[COND_END13:%.*]] +// CHECK11: cond.false12: +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: br label [[COND_END13]] +// CHECK11: cond.end13: +// CHECK11-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE11]] ], [ [[TMP44]], [[COND_FALSE12]] ] +// CHECK11-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK11: omp.inner.for.end: +// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK11: omp.loop.exit: +// CHECK11-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]]) +// CHECK11-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +// CHECK11-NEXT: br i1 [[TMP49]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11: .omp.final.then: +// CHECK11-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP50]], 0 +// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 +// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] +// CHECK11-NEXT: store i32 [[ADD17]], i32* [[I4]], align 4 +// CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK11: .omp.final.done: +// CHECK11-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP52:%.*]] = icmp ne i32 [[TMP51]], 0 +// CHECK11-NEXT: br i1 [[TMP52]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11: .omp.lastprivate.then: +// CHECK11-NEXT: [[TMP53:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP53]], i32* [[L_ADDR]], align 4 +// CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK11: .omp.lastprivate.done: +// CHECK11-NEXT: br label [[OMP_PRECOND_END]] +// CHECK11: omp.precond.end: +// CHECK11-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP1]]) +// CHECK11-NEXT: ret void +// CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK11-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK11: omp.precond.then: +// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) +// CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK11: omp.dispatch.cond: +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11: cond.true: +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: br label [[COND_END:%.*]] +// CHECK11: cond.false: +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: br label [[COND_END]] +// CHECK11: cond.end: +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK11: omp.dispatch.body: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK11: omp.inner.for.cond: +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11: omp.inner.for.body: +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] +// CHECK11-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: store i32 [[TMP20]], i32* [[L_ADDR]], align 4 +// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK11: omp.body.continue: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK11: omp.inner.for.inc: +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK11: omp.inner.for.end: +// CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK11: omp.dispatch.inc: +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK11: omp.dispatch.end: +// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK11-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11: .omp.final.then: +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP30]], 0 +// CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 +// CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 +// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] +// CHECK11-NEXT: store i32 [[ADD13]], i32* [[I3]], align 4 +// CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK11: .omp.final.done: +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK11: .omp.lastprivate.then: +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP33]], i32* [[L_ADDR]], align 4 +// CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK11: .omp.lastprivate.done: +// CHECK11-NEXT: br label [[OMP_PRECOND_END]] +// CHECK11: omp.precond.end: +// CHECK11-NEXT: ret void +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44 +// CHECK11-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK11-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK11-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK11-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK11: .execute: +// CHECK11-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK11-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] +// CHECK11-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK11: .omp.deinit: +// CHECK11-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK11-NEXT: br label [[DOTEXIT:%.*]] +// CHECK11: .exit: +// CHECK11-NEXT: ret void +// CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 +// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK11: omp.precond.then: +// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK11-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11: cond.true: +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: br label [[COND_END:%.*]] +// CHECK11: cond.false: +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: br label [[COND_END]] +// CHECK11: cond.end: +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK11: omp.inner.for.cond: +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11: omp.inner.for.body: +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to i8* +// CHECK11-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to i8* +// CHECK11-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to i8* +// CHECK11-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP25:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* +// CHECK11-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK11-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP28]], i32 4) +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK11: omp.inner.for.inc: +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] +// CHECK11-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK11: cond.true10: +// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: br label [[COND_END12:%.*]] +// CHECK11: cond.false11: +// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: br label [[COND_END12]] +// CHECK11: cond.end12: +// CHECK11-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] +// CHECK11-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP39]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK11: omp.inner.for.end: +// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK11: omp.loop.exit: +// CHECK11-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP41]]) +// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK11-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11: .omp.final.then: +// CHECK11-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK11-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 +// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] +// CHECK11-NEXT: store i32 [[ADD16]], i32* [[I3]], align 4 +// CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK11: .omp.final.done: +// CHECK11-NEXT: br label [[OMP_PRECOND_END]] +// CHECK11: omp.precond.end: +// CHECK11-NEXT: ret void +// CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__3 +// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK11: omp.precond.then: +// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK11: omp.inner.for.cond: +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] +// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11: omp.inner.for.body: +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 +// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK11-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 +// CHECK11-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX]], align 2 +// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK11: omp.body.continue: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK11: omp.inner.for.inc: +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK11: omp.inner.for.end: +// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK11: omp.loop.exit: +// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11: .omp.final.then: +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK11-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP21]], 0 +// CHECK11-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 +// CHECK11-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 +// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] +// CHECK11-NEXT: store i32 [[ADD11]], i32* [[I3]], align 4 +// CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK11: .omp.final.done: +// CHECK11-NEXT: br label [[OMP_PRECOND_END]] +// CHECK11: omp.precond.end: +// CHECK11-NEXT: ret void +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 +// CHECK11-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK11-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK11-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK11-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK11-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK11: .execute: +// CHECK11-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK11-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] +// CHECK11-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK11: .omp.deinit: +// CHECK11-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK11-NEXT: br label [[DOTEXIT:%.*]] +// CHECK11: .exit: +// CHECK11-NEXT: ret void +// CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__4 +// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11: cond.true: +// CHECK11-NEXT: br label [[COND_END:%.*]] +// CHECK11: cond.false: +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: br label [[COND_END]] +// CHECK11: cond.end: +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK11: omp.inner.for.cond: +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11: omp.inner.for.body: +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK11-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* +// CHECK11-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP14:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK11-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK11-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP15]], i32 3) +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK11: omp.inner.for.inc: +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 +// CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK11: cond.true5: +// CHECK11-NEXT: br label [[COND_END7:%.*]] +// CHECK11: cond.false6: +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: br label [[COND_END7]] +// CHECK11: cond.end7: +// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] +// CHECK11-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK11: omp.inner.for.end: +// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK11: omp.loop.exit: +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11: .omp.final.then: +// CHECK11-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK11: .omp.final.done: +// CHECK11-NEXT: ret void +// CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__5 +// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK11: omp.inner.for.cond: +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11: omp.inner.for.body: +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK11-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK11: omp.body.continue: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK11: omp.inner.for.inc: +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK11: omp.inner.for.end: +// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK11: omp.loop.exit: +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11: .omp.final.then: +// CHECK11-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK11: .omp.final.done: +// CHECK11-NEXT: ret void +// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 +// CHECK11-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK11-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 // CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 // CHECK11-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK11-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 // CHECK11-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() // CHECK11-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) // CHECK11-NEXT: br label [[DOTEXECUTE:%.*]] // CHECK11: .execute: -// CHECK11-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[L_CASTED]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[F_CASTED]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[F_CASTED]], align 4 // CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK11-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i32]* [[TMP0]], i32 [[TMP5]]) #[[ATTR2:[0-9]+]] +// CHECK11-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP3]]) #[[ATTR2]] // CHECK11-NEXT: br label [[DOTOMP_DEINIT:%.*]] // CHECK11: .omp.deinit: // CHECK11-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) // CHECK11-NEXT: br label [[DOTEXIT:%.*]] // CHECK11: .exit: // CHECK11-NEXT: ret void +// CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__6 +// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK11-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 +// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK11: cond.true: +// CHECK11-NEXT: br label [[COND_END:%.*]] +// CHECK11: cond.false: +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: br label [[COND_END]] +// CHECK11: cond.end: +// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK11: omp.inner.for.cond: +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 +// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11: omp.inner.for.body: +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP9]], i32* [[F_CASTED]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK11-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK11-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK11-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to i8* +// CHECK11-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK11-NEXT: [[TMP16:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* +// CHECK11-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK11-NEXT: [[TMP18:%.*]] = inttoptr i32 [[TMP10]] to i8* +// CHECK11-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4 +// CHECK11-NEXT: [[TMP19:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK11-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x [10 x i32]]*, i32)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP19]], i32 4) +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK11: omp.inner.for.inc: +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 +// CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK11: cond.true6: +// CHECK11-NEXT: br label [[COND_END8:%.*]] +// CHECK11: cond.false7: +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: br label [[COND_END8]] +// CHECK11: cond.end8: +// CHECK11-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] +// CHECK11-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK11: omp.inner.for.end: +// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK11: omp.loop.exit: +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK11-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11: .omp.final.then: +// CHECK11-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK11-NEXT: store i32 10, i32* [[J]], align 4 +// CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK11: .omp.final.done: +// CHECK11-NEXT: ret void +// CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__7 +// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK11-NEXT: entry: +// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK11-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK11-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 +// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK11: omp.inner.for.cond: +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK11-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK11-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK11: omp.inner.for.body: +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 +// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK11-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10 +// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]] +// CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] +// CHECK11-NEXT: store i32 [[ADD5]], i32* [[J]], align 4 +// CHECK11-NEXT: store i32 10, i32* [[K]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK11-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] +// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]] +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 +// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]] +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP15]] +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 +// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP16]] +// CHECK11-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX9]], align 4 +// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK11: omp.body.continue: +// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK11: omp.inner.for.inc: +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK11-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK11: omp.inner.for.end: +// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK11: omp.loop.exit: +// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK11: .omp.final.then: +// CHECK11-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK11-NEXT: store i32 10, i32* [[J]], align 4 +// CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK11: .omp.final.done: +// CHECK11-NEXT: ret void +// CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38 +// CHECK12-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK12-NEXT: entry: +// CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK12-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK12-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK12-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK12-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK12-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK12: .execute: +// CHECK12-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP4]], i32* [[L_CASTED]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK12-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK12-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i32]* [[TMP0]], i32 [[TMP5]]) #[[ATTR2:[0-9]+]] +// CHECK12-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK12: .omp.deinit: +// CHECK12-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK12-NEXT: br label [[DOTEXIT:%.*]] +// CHECK12: .exit: +// CHECK12-NEXT: ret void +// CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { +// CHECK12-NEXT: entry: +// CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK12-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK12-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 4, i16 1) +// CHECK12-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct._globalized_locals_ty* +// CHECK12-NEXT: [[L1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP2]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 +// CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK12-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK12-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] +// CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK12: omp.precond.then: +// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK12-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] +// CHECK12-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK12: cond.true: +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK12-NEXT: br label [[COND_END:%.*]] +// CHECK12: cond.false: +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: br label [[COND_END]] +// CHECK12: cond.end: +// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK12-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK12: omp.inner.for.cond: +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 +// CHECK12-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] +// CHECK12-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK12: omp.inner.for.body: +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP20]], i32* [[L_CASTED]], align 4 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK12-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP16]] to i8* +// CHECK12-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 +// CHECK12-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to i8* +// CHECK12-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 +// CHECK12-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP27:%.*]] = inttoptr i32 [[TMP19]] to i8* +// CHECK12-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4 +// CHECK12-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK12-NEXT: [[TMP29:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* +// CHECK12-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4 +// CHECK12-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 +// CHECK12-NEXT: [[TMP31:%.*]] = inttoptr i32 [[TMP21]] to i8* +// CHECK12-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 4 +// CHECK12-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK12-NEXT: [[TMP34:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK12-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP34]], i32 5) +// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK12: omp.inner.for.inc: +// CHECK12-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] +// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK12-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] +// CHECK12-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK12: cond.true11: +// CHECK12-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK12-NEXT: br label [[COND_END13:%.*]] +// CHECK12: cond.false12: +// CHECK12-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: br label [[COND_END13]] +// CHECK12: cond.end13: +// CHECK12-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE11]] ], [ [[TMP44]], [[COND_FALSE12]] ] +// CHECK12-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK12: omp.inner.for.end: +// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK12: omp.loop.exit: +// CHECK12-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]]) +// CHECK12-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK12-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +// CHECK12-NEXT: br i1 [[TMP49]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK12: .omp.final.then: +// CHECK12-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP50]], 0 +// CHECK12-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 +// CHECK12-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] +// CHECK12-NEXT: store i32 [[ADD17]], i32* [[I4]], align 4 +// CHECK12-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK12: .omp.final.done: +// CHECK12-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK12-NEXT: [[TMP52:%.*]] = icmp ne i32 [[TMP51]], 0 +// CHECK12-NEXT: br i1 [[TMP52]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK12: .omp.lastprivate.then: +// CHECK12-NEXT: [[TMP53:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP53]], i32* [[L_ADDR]], align 4 +// CHECK12-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK12: .omp.lastprivate.done: +// CHECK12-NEXT: br label [[OMP_PRECOND_END]] +// CHECK12: omp.precond.end: +// CHECK12-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP1]]) +// CHECK12-NEXT: ret void +// CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { +// CHECK12-NEXT: entry: +// CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK12-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK12-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK12-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK12-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK12-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK12: omp.precond.then: +// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) +// CHECK12-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK12: omp.dispatch.cond: +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK12-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] +// CHECK12-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK12: cond.true: +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK12-NEXT: br label [[COND_END:%.*]] +// CHECK12: cond.false: +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: br label [[COND_END]] +// CHECK12: cond.end: +// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK12-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK12: omp.dispatch.body: +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK12: omp.inner.for.cond: +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK12-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK12: omp.inner.for.body: +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] +// CHECK12-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: store i32 [[TMP20]], i32* [[L_ADDR]], align 4 +// CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK12: omp.body.continue: +// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK12: omp.inner.for.inc: +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK12: omp.inner.for.end: +// CHECK12-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK12: omp.dispatch.inc: +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK12: omp.dispatch.end: +// CHECK12-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK12-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK12-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK12: .omp.final.then: +// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP30]], 0 +// CHECK12-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 +// CHECK12-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 +// CHECK12-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] +// CHECK12-NEXT: store i32 [[ADD13]], i32* [[I3]], align 4 +// CHECK12-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK12: .omp.final.done: +// CHECK12-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK12-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK12-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK12: .omp.lastprivate.then: +// CHECK12-NEXT: [[TMP33:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP33]], i32* [[L_ADDR]], align 4 +// CHECK12-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK12: .omp.lastprivate.done: +// CHECK12-NEXT: br label [[OMP_PRECOND_END]] +// CHECK12: omp.precond.end: +// CHECK12-NEXT: ret void +// CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44 +// CHECK12-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK12-NEXT: entry: +// CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK12-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK12-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK12-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK12-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK12-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK12: .execute: +// CHECK12-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK12-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK12-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] +// CHECK12-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK12: .omp.deinit: +// CHECK12-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK12-NEXT: br label [[DOTEXIT:%.*]] +// CHECK12: .exit: +// CHECK12-NEXT: ret void +// CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK12-NEXT: entry: +// CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 +// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK12-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK12-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK12-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK12: omp.precond.then: +// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK12-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK12-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK12-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK12: cond.true: +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: br label [[COND_END:%.*]] +// CHECK12: cond.false: +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: br label [[COND_END]] +// CHECK12: cond.end: +// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK12-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK12: omp.inner.for.cond: +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK12-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK12: omp.inner.for.body: +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to i8* +// CHECK12-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to i8* +// CHECK12-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 +// CHECK12-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to i8* +// CHECK12-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 +// CHECK12-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK12-NEXT: [[TMP25:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* +// CHECK12-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 +// CHECK12-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK12-NEXT: [[TMP28:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK12-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP28]], i32 4) +// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK12: omp.inner.for.inc: +// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] +// CHECK12-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK12: cond.true10: +// CHECK12-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: br label [[COND_END12:%.*]] +// CHECK12: cond.false11: +// CHECK12-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: br label [[COND_END12]] +// CHECK12: cond.end12: +// CHECK12-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] +// CHECK12-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP39]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK12: omp.inner.for.end: +// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK12: omp.loop.exit: +// CHECK12-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP41]]) +// CHECK12-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK12-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK12-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK12: .omp.final.then: +// CHECK12-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK12-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 +// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 +// CHECK12-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] +// CHECK12-NEXT: store i32 [[ADD16]], i32* [[I3]], align 4 +// CHECK12-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK12: .omp.final.done: +// CHECK12-NEXT: br label [[OMP_PRECOND_END]] +// CHECK12: omp.precond.end: +// CHECK12-NEXT: ret void +// CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__3 +// CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK12-NEXT: entry: +// CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK12-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK12-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK12-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK12-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK12: omp.precond.then: +// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK12: omp.inner.for.cond: +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK12-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] +// CHECK12-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK12: omp.inner.for.body: +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK12-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 +// CHECK12-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK12-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK12-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 +// CHECK12-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX]], align 2 +// CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK12: omp.body.continue: +// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK12: omp.inner.for.inc: +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK12: omp.inner.for.end: +// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK12: omp.loop.exit: +// CHECK12-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK12-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK12: .omp.final.then: +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK12-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP21]], 0 +// CHECK12-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 +// CHECK12-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 +// CHECK12-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] +// CHECK12-NEXT: store i32 [[ADD11]], i32* [[I3]], align 4 +// CHECK12-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK12: .omp.final.done: +// CHECK12-NEXT: br label [[OMP_PRECOND_END]] +// CHECK12: omp.precond.end: +// CHECK12-NEXT: ret void +// CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 +// CHECK12-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK12-NEXT: entry: +// CHECK12-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK12-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK12-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK12-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK12-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK12-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK12: .execute: +// CHECK12-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK12-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK12-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] +// CHECK12-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK12: .omp.deinit: +// CHECK12-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK12-NEXT: br label [[DOTEXIT:%.*]] +// CHECK12: .exit: +// CHECK12-NEXT: ret void +// CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__4 +// CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK12-NEXT: entry: +// CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK12-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK12-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK12-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK12-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK12-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK12: cond.true: +// CHECK12-NEXT: br label [[COND_END:%.*]] +// CHECK12: cond.false: +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: br label [[COND_END]] +// CHECK12: cond.end: +// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK12-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK12: omp.inner.for.cond: +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK12: omp.inner.for.body: +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK12-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* +// CHECK12-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP14:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK12-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK12-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP15]], i32 3) +// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK12: omp.inner.for.inc: +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 +// CHECK12-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK12: cond.true5: +// CHECK12-NEXT: br label [[COND_END7:%.*]] +// CHECK12: cond.false6: +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: br label [[COND_END7]] +// CHECK12: cond.end7: +// CHECK12-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] +// CHECK12-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK12: omp.inner.for.end: +// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK12: omp.loop.exit: +// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK12-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK12-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK12: .omp.final.then: +// CHECK12-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK12-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK12: .omp.final.done: +// CHECK12-NEXT: ret void +// CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__5 +// CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK12-NEXT: entry: +// CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK12-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK12-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK12: omp.inner.for.cond: +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK12-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK12-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK12: omp.inner.for.body: +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK12-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK12: omp.body.continue: +// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK12: omp.inner.for.inc: +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK12: omp.inner.for.end: +// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK12: omp.loop.exit: +// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK12-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK12: .omp.final.then: +// CHECK12-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK12-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK12: .omp.final.done: +// CHECK12-NEXT: ret void +// CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 +// CHECK12-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK12-NEXT: entry: +// CHECK12-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK12-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK12-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK12-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK12-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK12-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK12: .execute: +// CHECK12-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP2]], i32* [[F_CASTED]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK12-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK12-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP3]]) #[[ATTR2]] +// CHECK12-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK12: .omp.deinit: +// CHECK12-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK12-NEXT: br label [[DOTEXIT:%.*]] +// CHECK12: .exit: +// CHECK12-NEXT: ret void +// CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__6 +// CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK12-NEXT: entry: +// CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK12-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK12-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 +// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK12-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK12-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK12-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK12-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK12: cond.true: +// CHECK12-NEXT: br label [[COND_END:%.*]] +// CHECK12: cond.false: +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: br label [[COND_END]] +// CHECK12: cond.end: +// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK12-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK12: omp.inner.for.cond: +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 +// CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK12: omp.inner.for.body: +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP9]], i32* [[F_CASTED]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK12-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK12-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK12-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to i8* +// CHECK12-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK12-NEXT: [[TMP16:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* +// CHECK12-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK12-NEXT: [[TMP18:%.*]] = inttoptr i32 [[TMP10]] to i8* +// CHECK12-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4 +// CHECK12-NEXT: [[TMP19:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK12-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x [10 x i32]]*, i32)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP19]], i32 4) +// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK12: omp.inner.for.inc: +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 +// CHECK12-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK12: cond.true6: +// CHECK12-NEXT: br label [[COND_END8:%.*]] +// CHECK12: cond.false7: +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: br label [[COND_END8]] +// CHECK12: cond.end8: +// CHECK12-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] +// CHECK12-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK12: omp.inner.for.end: +// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK12: omp.loop.exit: +// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK12-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK12-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK12: .omp.final.then: +// CHECK12-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK12-NEXT: store i32 10, i32* [[J]], align 4 +// CHECK12-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK12: .omp.final.done: +// CHECK12-NEXT: ret void +// CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__7 +// CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK12-NEXT: entry: +// CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK12-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK12-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK12-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK12-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 +// CHECK12-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK12-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK12-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK12: omp.inner.for.cond: +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK12-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK12-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK12: omp.inner.for.body: +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 +// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK12-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10 +// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]] +// CHECK12-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK12-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] +// CHECK12-NEXT: store i32 [[ADD5]], i32* [[J]], align 4 +// CHECK12-NEXT: store i32 10, i32* [[K]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK12-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] +// CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]] +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 +// CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]] +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP15]] +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 +// CHECK12-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP16]] +// CHECK12-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX9]], align 4 +// CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK12: omp.body.continue: +// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK12: omp.inner.for.inc: +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK12-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK12: omp.inner.for.end: +// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK12: omp.loop.exit: +// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK12-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK12-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK12: .omp.final.then: +// CHECK12-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK12-NEXT: store i32 10, i32* [[J]], align 4 +// CHECK12-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK12: .omp.final.done: +// CHECK12-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26 +// CHECK1-SAME: (i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 +// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK1-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK1: .execute: +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[N_CASTED]] to i32* +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV2]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[L_CASTED]] to i32* +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[CONV3]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[L_CASTED]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]], [1000 x i32]* [[TMP0]], i64 [[TMP5]]) #[[ATTR2:[0-9]+]] +// CHECK1-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK1: .omp.deinit: +// CHECK1-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK1-NEXT: br label [[DOTEXIT:%.*]] +// CHECK1: .exit: +// CHECK1-NEXT: ret void // // -// CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK11-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 4 -// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 4, i16 1) -// CHECK11-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct._globalized_locals_ty* -// CHECK11-NEXT: [[L1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP2]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK11-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK11-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] -// CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK11: omp.precond.then: -// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: br label [[COND_END:%.*]] -// CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: br label [[COND_END]] -// CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] -// CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] -// CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP20]], i32* [[L_CASTED]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[L_CASTED]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP16]] to i8* -// CHECK11-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK11-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK11-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP27:%.*]] = inttoptr i32 [[TMP19]] to i8* -// CHECK11-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK11-NEXT: [[TMP29:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK11-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 -// CHECK11-NEXT: [[TMP31:%.*]] = inttoptr i32 [[TMP21]] to i8* -// CHECK11-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK11-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP34]], i32 5) -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] -// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] -// CHECK11-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] -// CHECK11: cond.true11: -// CHECK11-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK11-NEXT: br label [[COND_END13:%.*]] -// CHECK11: cond.false12: -// CHECK11-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: br label [[COND_END13]] -// CHECK11: cond.end13: -// CHECK11-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE11]] ], [ [[TMP44]], [[COND_FALSE12]] ] -// CHECK11-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] -// CHECK11: omp.inner.for.end: -// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]]) -// CHECK11-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 -// CHECK11-NEXT: br i1 [[TMP49]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP50]], 0 -// CHECK11-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 -// CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD17]], i32* [[I4]], align 4 -// CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP52:%.*]] = icmp ne i32 [[TMP51]], 0 -// CHECK11-NEXT: br i1 [[TMP52]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP53:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP53]], i32* [[L_ADDR]], align 4 -// CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: br label [[OMP_PRECOND_END]] -// CHECK11: omp.precond.end: -// CHECK11-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP1]]) -// CHECK11-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 +// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[L_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 8 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* +// CHECK1-NEXT: [[L2:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: [[L_ON_STACK:%.*]] = bitcast i8* [[L2]] to i32* +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB4:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB4]], i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV8]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: [[CONV9:%.*]] = bitcast i64* [[L_CASTED]] to i32* +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV9]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[L_CASTED]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP15]] to i8* +// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP17]] to i8* +// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP27:%.*]] = inttoptr i64 [[TMP19]] to i8* +// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP29:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* +// CHECK1-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 8 +// CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 4 +// CHECK1-NEXT: [[TMP31:%.*]] = inttoptr i64 [[TMP21]] to i8* +// CHECK1-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 8 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i32]*, i64)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP34]], i64 5) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK1-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] +// CHECK1-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] +// CHECK1: cond.true14: +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK1-NEXT: br label [[COND_END16:%.*]] +// CHECK1: cond.false15: +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END16]] +// CHECK1: cond.end16: +// CHECK1-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE14]] ], [ [[TMP44]], [[COND_FALSE15]] ] +// CHECK1-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]]) +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 +// CHECK1-NEXT: br i1 [[TMP49]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1: .omp.final.then: +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB18:%.*]] = sub nsw i32 [[TMP50]], 0 +// CHECK1-NEXT: [[DIV19:%.*]] = sdiv i32 [[SUB18]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV19]], 1 +// CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD20]], i32* [[I5]], align 4 +// CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK1: .omp.final.done: +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP52:%.*]] = icmp ne i32 [[TMP51]], 0 +// CHECK1-NEXT: br i1 [[TMP52]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1: .omp.lastprivate.then: +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[TMP53]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK1: .omp.lastprivate.done: +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[L2]]) +// CHECK1-NEXT: ret void // // -// CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK11-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK11: omp.precond.then: -// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) -// CHECK11-NEXT: br label [[OMP_DISPATCH_COND:%.*]] -// CHECK11: omp.dispatch.cond: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK11: cond.true: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: br label [[COND_END:%.*]] -// CHECK11: cond.false: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: br label [[COND_END]] -// CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] -// CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] -// CHECK11: omp.dispatch.body: -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK11-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: store i32 [[TMP20]], i32* [[L_ADDR]], align 4 -// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK11: omp.body.continue: -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] -// CHECK11: omp.inner.for.end: -// CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] -// CHECK11: omp.dispatch.inc: -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK11-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: br label [[OMP_DISPATCH_COND]] -// CHECK11: omp.dispatch.end: -// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK11-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP30]], 0 -// CHECK11-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 -// CHECK11-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 -// CHECK11-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK11-NEXT: store i32 [[ADD13]], i32* [[I3]], align 4 -// CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK11: .omp.final.done: -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK11-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK11: .omp.lastprivate.then: -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP33]], i32* [[L_ADDR]], align 4 -// CHECK11-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK11: .omp.lastprivate.done: -// CHECK11-NEXT: br label [[OMP_PRECOND_END]] -// CHECK11: omp.precond.end: -// CHECK11-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i64 [[L:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 8 +// CHECK1-NEXT: [[L_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I6:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK1-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[L]], i64* [[L_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[L_ADDR]] to i32* +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV5:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV5]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK1: omp.dispatch.cond: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp ugt i64 [[CONV7]], [[TMP10]] +// CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CONV9:%.*]] = sext i32 [[TMP12]] to i64 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i64 [ [[TMP11]], [[COND_TRUE]] ], [ [[CONV9]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[CONV10:%.*]] = trunc i64 [[COND]] to i32 +// CHECK1-NEXT: store i32 [[CONV10]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP11:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK1: omp.dispatch.body: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[CMP12:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK1-NEXT: br i1 [[CMP12]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK1: omp.dispatch.inc: +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK1: omp.dispatch.end: +// CHECK1-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK1-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1: .omp.final.then: +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB16:%.*]] = sub nsw i32 [[TMP30]], 0 +// CHECK1-NEXT: [[DIV17:%.*]] = sdiv i32 [[SUB16]], 1 +// CHECK1-NEXT: [[MUL18:%.*]] = mul nsw i32 [[DIV17]], 1 +// CHECK1-NEXT: [[ADD19:%.*]] = add nsw i32 0, [[MUL18]] +// CHECK1-NEXT: store i32 [[ADD19]], i32* [[I6]], align 4 +// CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK1: .omp.final.done: +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK1: .omp.lastprivate.then: +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[TMP33]], i32* [[CONV1]], align 8 +// CHECK1-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK1: .omp.lastprivate.done: +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: ret void // // -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44 -// CHECK11-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK11-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK11-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK11: .execute: -// CHECK11-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK11-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] -// CHECK11-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK11: .omp.deinit: -// CHECK11-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK11-NEXT: br label [[DOTEXIT:%.*]] -// CHECK11: .exit: -// CHECK11-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32 +// CHECK1-SAME: (i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 +// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK1-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK1: .execute: +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[N_CASTED]] to i32* +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i64 [[TMP3]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] +// CHECK1-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK1: .omp.deinit: +// CHECK1-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK1-NEXT: br label [[DOTEXIT:%.*]] +// CHECK1: .exit: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK1-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = zext i32 [[TMP14]] to i64 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* +// CHECK1-NEXT: store i32 [[TMP18]], i32* [[CONV6]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i64, i64* [[N_CASTED]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP21:%.*]] = inttoptr i64 [[TMP15]] to i8* +// CHECK1-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP23:%.*]] = inttoptr i64 [[TMP17]] to i8* +// CHECK1-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP25:%.*]] = inttoptr i64 [[TMP19]] to i8* +// CHECK1-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* +// CHECK1-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 8 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP28]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP29]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, i64, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP30]], i64 4) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK1: cond.true11: +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: br label [[COND_END13:%.*]] +// CHECK1: cond.false12: +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END13]] +// CHECK1: cond.end13: +// CHECK1-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP39]], [[COND_TRUE11]] ], [ [[TMP40]], [[COND_FALSE12]] ] +// CHECK1-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP41]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP42:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[TMP42]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP43]]) +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 +// CHECK1-NEXT: br i1 [[TMP45]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1: .omp.final.then: +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP46]], 0 +// CHECK1-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 +// CHECK1-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD17]], i32* [[I3]], align 4 +// CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK1: .omp.final.done: +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: ret void // // -// CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 -// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 -// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK11: omp.precond.then: -// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK11-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] -// CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK11: cond.true: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: br label [[COND_END:%.*]] -// CHECK11: cond.false: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: br label [[COND_END]] -// CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] -// CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] -// CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to i8* -// CHECK11-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK11-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to i8* -// CHECK11-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK11-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK11-NEXT: [[TMP25:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* -// CHECK11-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK11-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP28]], i32 4) -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] -// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] -// CHECK11-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] -// CHECK11: cond.true10: -// CHECK11-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: br label [[COND_END12:%.*]] -// CHECK11: cond.false11: -// CHECK11-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: br label [[COND_END12]] -// CHECK11: cond.end12: -// CHECK11-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] -// CHECK11-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP39]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] -// CHECK11: omp.inner.for.end: -// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP41]]) -// CHECK11-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 -// CHECK11-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP44]], 0 -// CHECK11-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 -// CHECK11-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD16]], i32* [[I3]], align 4 -// CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK11: .omp.final.done: -// CHECK11-NEXT: br label [[OMP_PRECOND_END]] -// CHECK11: omp.precond.end: -// CHECK11-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3 +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], i64 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[N_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I5:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 +// CHECK1-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32* +// CHECK1-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK1-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK1-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK1: omp.precond.then: +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP5]] to i32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i64 [[TMP6]] to i32 +// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV4]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CONV6:%.*]] = sext i32 [[TMP10]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CMP7:%.*]] = icmp ule i64 [[CONV6]], [[TMP11]] +// CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 +// CHECK1-NEXT: [[CONV8:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[CONV8]], 1 +// CHECK1-NEXT: [[CONV10:%.*]] = trunc i32 [[ADD9]] to i16 +// CHECK1-NEXT: store i16 [[CONV10]], i16* [[ARRAYIDX]], align 2 +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1: .omp.final.then: +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK1-NEXT: [[SUB12:%.*]] = sub nsw i32 [[TMP21]], 0 +// CHECK1-NEXT: [[DIV13:%.*]] = sdiv i32 [[SUB12]], 1 +// CHECK1-NEXT: [[MUL14:%.*]] = mul nsw i32 [[DIV13]], 1 +// CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 0, [[MUL14]] +// CHECK1-NEXT: store i32 [[ADD15]], i32* [[I5]], align 4 +// CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK1: .omp.final.done: +// CHECK1-NEXT: br label [[OMP_PRECOND_END]] +// CHECK1: omp.precond.end: +// CHECK1-NEXT: ret void // // -// CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 -// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK11-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK11-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK11-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK11: omp.precond.then: -// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] -// CHECK11-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 -// CHECK11-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1 -// CHECK11-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK11-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX]], align 2 -// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK11: omp.body.continue: -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK11-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] -// CHECK11: omp.inner.for.end: -// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK11: omp.loop.exit: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK11: .omp.final.then: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK11-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP21]], 0 -// CHECK11-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK11-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 -// CHECK11-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK11-NEXT: store i32 [[ADD11]], i32* [[I3]], align 4 -// CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK11: .omp.final.done: -// CHECK11-NEXT: br label [[OMP_PRECOND_END]] -// CHECK11: omp.precond.end: -// CHECK11-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37 +// CHECK1-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK1-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK1: .execute: +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] +// CHECK1-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK1: .omp.deinit: +// CHECK1-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK1-NEXT: br label [[DOTEXIT:%.*]] +// CHECK1: .exit: +// CHECK1-NEXT: ret void // // -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 -// CHECK11-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK11-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK11-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK11-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK11: .execute: -// CHECK11-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK11-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] -// CHECK11-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK11: .omp.deinit: -// CHECK11-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK11-NEXT: br label [[DOTEXIT:%.*]] -// CHECK11: .exit: -// CHECK11-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__4 +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 8 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP12:%.*]] = inttoptr i64 [[TMP8]] to i8* +// CHECK1-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP10]] to i8* +// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP17]], i64 3) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP24]], 9 +// CHECK1-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK1: cond.true5: +// CHECK1-NEXT: br label [[COND_END7:%.*]] +// CHECK1: cond.false6: +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END7]] +// CHECK1: cond.end7: +// CHECK1-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP25]], [[COND_FALSE6]] ] +// CHECK1-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = icmp ne i32 [[TMP27]], 0 +// CHECK1-NEXT: br i1 [[TMP28]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1: .omp.final.then: +// CHECK1-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK1: .omp.final.done: +// CHECK1-NEXT: ret void // // -// CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 -// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 -// CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK11: cond.true: -// CHECK11-NEXT: br label [[COND_END:%.*]] -// CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: br label [[COND_END]] -// CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] -// CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 -// CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK11-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK11-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK11-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP14:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK11-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK11-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP15]], i32 3) -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 -// CHECK11-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK11: cond.true5: -// CHECK11-NEXT: br label [[COND_END7:%.*]] -// CHECK11: cond.false6: -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: br label [[COND_END7]] -// CHECK11: cond.end7: -// CHECK11-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] -// CHECK11-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] -// CHECK11: omp.inner.for.end: -// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK11-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK11: .omp.final.then: -// CHECK11-NEXT: store i32 10, i32* [[I]], align 4 -// CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK11: .omp.final.done: -// CHECK11-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__5 +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: store i32 [[CONV]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV1]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CONV2:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV2]], [[TMP7]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK1-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1: .omp.final.then: +// CHECK1-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK1: .omp.final.done: +// CHECK1-NEXT: ret void // // -// CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] -// CHECK11-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 -// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK11: omp.body.continue: -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] -// CHECK11: omp.inner.for.end: -// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK11-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK11: .omp.final.then: -// CHECK11-NEXT: store i32 10, i32* [[I]], align 4 -// CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK11: .omp.final.done: -// CHECK11-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42 +// CHECK1-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 +// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK1-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK1: .execute: +// CHECK1-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[F_CASTED]] to i32* +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[CONV1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i64, i64* [[F_CASTED]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i64 [[TMP3]]) #[[ATTR2]] +// CHECK1-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK1: .omp.deinit: +// CHECK1-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK1-NEXT: br label [[DOTEXIT:%.*]] +// CHECK1: .exit: +// CHECK1-NEXT: ret void // // -// CHECK11-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 -// CHECK11-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK11-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK11-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK11-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK11-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK11-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK11: .execute: -// CHECK11-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[F_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[F_CASTED]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[F_CASTED]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK11-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP3]]) #[[ATTR2]] -// CHECK11-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK11: .omp.deinit: -// CHECK11-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK11-NEXT: br label [[DOTEXIT:%.*]] -// CHECK11: .exit: -// CHECK11-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__6 +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 +// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[F_CASTED:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 8 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 +// CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = bitcast i64* [[F_CASTED]] to i32* +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV3]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[F_CASTED]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP14:%.*]] = inttoptr i64 [[TMP8]] to i8* +// CHECK1-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP16:%.*]] = inttoptr i64 [[TMP10]] to i8* +// CHECK1-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 2 +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* +// CHECK1-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 3 +// CHECK1-NEXT: [[TMP20:%.*]] = inttoptr i64 [[TMP12]] to i8* +// CHECK1-NEXT: store i8* [[TMP20]], i8** [[TMP19]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i64, i64, [10 x [10 x i32]]*, i64)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP21]], i64 4) +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[CMP6:%.*]] = icmp sgt i32 [[TMP28]], 99 +// CHECK1-NEXT: br i1 [[CMP6]], label [[COND_TRUE7:%.*]], label [[COND_FALSE8:%.*]] +// CHECK1: cond.true7: +// CHECK1-NEXT: br label [[COND_END9:%.*]] +// CHECK1: cond.false8: +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: br label [[COND_END9]] +// CHECK1: cond.end9: +// CHECK1-NEXT: [[COND10:%.*]] = phi i32 [ 99, [[COND_TRUE7]] ], [ [[TMP29]], [[COND_FALSE8]] ] +// CHECK1-NEXT: store i32 [[COND10]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP30]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK1-NEXT: br i1 [[TMP32]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1: .omp.final.then: +// CHECK1-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 10, i32* [[J]], align 4 +// CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK1: .omp.final.done: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__7 +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTPREVIOUS_LB_:%.*]], i64 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i64 [[F:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 8 +// CHECK1-NEXT: [[F_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_LB_]], i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: store i64 [[DOTPREVIOUS_UB_]], i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[F]], i64* [[F_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[F_ADDR]] to i32* +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP1:%.*]] = load i64, i64* [[DOTPREVIOUS_LB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV2:%.*]] = trunc i64 [[TMP1]] to i32 +// CHECK1-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i64 [[TMP2]] to i32 +// CHECK1-NEXT: store i32 [[CONV2]], i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[CONV3]], i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK1-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK1: omp.inner.for.cond: +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[CONV4:%.*]] = sext i32 [[TMP6]] to i64 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTPREVIOUS_UB__ADDR]], align 8 +// CHECK1-NEXT: [[CMP:%.*]] = icmp ule i64 [[CONV4]], [[TMP7]] +// CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK1: omp.inner.for.body: +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 +// CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[DIV5:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK1-NEXT: [[MUL6:%.*]] = mul nsw i32 [[DIV5]], 10 +// CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL6]] +// CHECK1-NEXT: [[MUL7:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 0, [[MUL7]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[J]], align 4 +// CHECK1-NEXT: store i32 10, i32* [[K]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[MUL9:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] +// CHECK1-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP11]], [[MUL9]] +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 +// CHECK1-NEXT: [[ADD11:%.*]] = add nsw i32 [[ADD10]], [[TMP14]] +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 +// CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i64 0, i64 [[IDXPROM]] +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 +// CHECK1-NEXT: [[IDXPROM12:%.*]] = sext i32 [[TMP16]] to i64 +// CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i64 0, i64 [[IDXPROM12]] +// CHECK1-NEXT: store i32 [[ADD11]], i32* [[ARRAYIDX13]], align 4 +// CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK1: omp.body.continue: +// CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK1: omp.inner.for.inc: +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK1: omp.inner.for.end: +// CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK1: omp.loop.exit: +// CHECK1-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK1-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK1: .omp.final.then: +// CHECK1-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK1-NEXT: store i32 10, i32* [[J]], align 4 +// CHECK1-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK1: .omp.final.done: +// CHECK1-NEXT: ret void // // -// CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__6 -// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK11-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 -// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK11-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK11-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 -// CHECK11-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK11: cond.true: -// CHECK11-NEXT: br label [[COND_END:%.*]] -// CHECK11: cond.false: -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: br label [[COND_END]] -// CHECK11: cond.end: -// CHECK11-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] -// CHECK11-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 -// CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP9]], i32* [[F_CASTED]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[F_CASTED]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK11-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK11-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK11-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP16:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK11-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK11-NEXT: [[TMP18:%.*]] = inttoptr i32 [[TMP10]] to i8* -// CHECK11-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4 -// CHECK11-NEXT: [[TMP19:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK11-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x [10 x i32]]*, i32)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP19]], i32 4) -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 -// CHECK11-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] -// CHECK11: cond.true6: -// CHECK11-NEXT: br label [[COND_END8:%.*]] -// CHECK11: cond.false7: -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: br label [[COND_END8]] -// CHECK11: cond.end8: -// CHECK11-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] -// CHECK11-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK11-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] -// CHECK11: omp.inner.for.end: -// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK11-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK11: .omp.final.then: -// CHECK11-NEXT: store i32 10, i32* [[I]], align 4 -// CHECK11-NEXT: store i32 10, i32* [[J]], align 4 -// CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK11: .omp.final.done: -// CHECK11-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26 +// CHECK2-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK2: .execute: +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[L_CASTED]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i32]* [[TMP0]], i32 [[TMP5]]) #[[ATTR2:[0-9]+]] +// CHECK2-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK2: .omp.deinit: +// CHECK2-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK2-NEXT: br label [[DOTEXIT:%.*]] +// CHECK2: .exit: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK2-NEXT: [[L1:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[L_ON_STACK:%.*]] = bitcast i8* [[L1]] to i32* +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK2: omp.precond.then: +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP18]], i32* [[L_CASTED]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP14]] to i8* +// CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP15]] to i8* +// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to i8* +// CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP27:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* +// CHECK2-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 +// CHECK2-NEXT: [[TMP29:%.*]] = inttoptr i32 [[TMP19]] to i8* +// CHECK2-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP32]], i32 5) +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] +// CHECK2-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK2: cond.true11: +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK2-NEXT: br label [[COND_END13:%.*]] +// CHECK2: cond.false12: +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END13]] +// CHECK2: cond.end13: +// CHECK2-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE11]] ], [ [[TMP42]], [[COND_FALSE12]] ] +// CHECK2-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP43]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP45]]) +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 +// CHECK2-NEXT: br i1 [[TMP47]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2: .omp.final.then: +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP48]], 0 +// CHECK2-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 +// CHECK2-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD17]], i32* [[I4]], align 4 +// CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK2: .omp.final.done: +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CHECK2-NEXT: br i1 [[TMP50]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK2: .omp.lastprivate.then: +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP51]], i32* [[L_ADDR]], align 4 +// CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK2: .omp.lastprivate.done: +// CHECK2-NEXT: br label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.end: +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[L1]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK2-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK2: omp.precond.then: +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) +// CHECK2-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK2: omp.dispatch.cond: +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] +// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK2: omp.dispatch.body: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] +// CHECK2-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK2-NEXT: store i32 [[TMP20]], i32* [[L_ADDR]], align 4 +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK2: omp.dispatch.inc: +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK2: omp.dispatch.end: +// CHECK2-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK2-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2: .omp.final.then: +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP30]], 0 +// CHECK2-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 +// CHECK2-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 +// CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] +// CHECK2-NEXT: store i32 [[ADD13]], i32* [[I3]], align 4 +// CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK2: .omp.final.done: +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK2-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK2: .omp.lastprivate.then: +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP33]], i32* [[L_ADDR]], align 4 +// CHECK2-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK2: .omp.lastprivate.done: +// CHECK2-NEXT: br label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.end: +// CHECK2-NEXT: ret void // // -// CHECK11-LABEL: define {{[^@]+}}@__omp_outlined__7 -// CHECK11-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { -// CHECK11-NEXT: entry: -// CHECK11-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK11-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK11-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 -// CHECK11-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK11-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 -// CHECK11-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK11-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK11-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK11-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] -// CHECK11-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 -// CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 -// CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10 -// CHECK11-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10 -// CHECK11-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]] -// CHECK11-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 -// CHECK11-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] -// CHECK11-NEXT: store i32 [[ADD5]], i32* [[J]], align 4 -// CHECK11-NEXT: store i32 10, i32* [[K]], align 4 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[F_ADDR]], align 4 -// CHECK11-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]] -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 -// CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]] -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP15]] -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 -// CHECK11-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP16]] -// CHECK11-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX9]], align 4 -// CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK11: omp.body.continue: -// CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK11-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK11-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] -// CHECK11: omp.inner.for.end: -// CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK11: omp.loop.exit: -// CHECK11-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK11-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK11-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK11: .omp.final.then: -// CHECK11-NEXT: store i32 10, i32* [[I]], align 4 -// CHECK11-NEXT: store i32 10, i32* [[J]], align 4 -// CHECK11-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK11: .omp.final.done: -// CHECK11-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32 +// CHECK2-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK2: .execute: +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] +// CHECK2-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK2: .omp.deinit: +// CHECK2-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK2-NEXT: br label [[DOTEXIT:%.*]] +// CHECK2: .exit: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK2: omp.precond.then: +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to i8* +// CHECK2-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to i8* +// CHECK2-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to i8* +// CHECK2-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP25:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* +// CHECK2-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP28]], i32 4) +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] +// CHECK2-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK2: cond.true10: +// CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: br label [[COND_END12:%.*]] +// CHECK2: cond.false11: +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END12]] +// CHECK2: cond.end12: +// CHECK2-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] +// CHECK2-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP39]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP41]]) +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK2-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2: .omp.final.then: +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK2-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 +// CHECK2-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD16]], i32* [[I3]], align 4 +// CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK2: .omp.final.done: +// CHECK2-NEXT: br label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.end: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__3 +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK2-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK2-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK2-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK2: omp.precond.then: +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] +// CHECK2-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK2-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 +// CHECK2-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX]], align 2 +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2: .omp.final.then: +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK2-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP21]], 0 +// CHECK2-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 +// CHECK2-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 +// CHECK2-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] +// CHECK2-NEXT: store i32 [[ADD11]], i32* [[I3]], align 4 +// CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK2: .omp.final.done: +// CHECK2-NEXT: br label [[OMP_PRECOND_END]] +// CHECK2: omp.precond.end: +// CHECK2-NEXT: ret void // // -// CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l38 -// CHECK12-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] { -// CHECK12-NEXT: entry: -// CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK12-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 -// CHECK12-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK12-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK12-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK12-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK12: .execute: -// CHECK12-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP4]], i32* [[L_CASTED]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[L_CASTED]], align 4 -// CHECK12-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK12-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i32]* [[TMP0]], i32 [[TMP5]]) #[[ATTR2:[0-9]+]] -// CHECK12-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK12: .omp.deinit: -// CHECK12-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK12-NEXT: br label [[DOTEXIT:%.*]] -// CHECK12: .exit: -// CHECK12-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37 +// CHECK2-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK2: .execute: +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] +// CHECK2-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK2: .omp.deinit: +// CHECK2-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK2-NEXT: br label [[DOTEXIT:%.*]] +// CHECK2: .exit: +// CHECK2-NEXT: ret void // // -// CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { -// CHECK12-NEXT: entry: -// CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK12-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[I4:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 4 -// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 -// CHECK12-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 4, i16 1) -// CHECK12-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct._globalized_locals_ty* -// CHECK12-NEXT: [[L1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP2]], i32 0, i32 0 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP3]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP4]], 0 -// CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK12-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK12-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK12-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP5]] -// CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK12: omp.precond.then: -// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP8]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK12-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP9]], [[TMP10]] -// CHECK12-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK12: cond.true: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK12-NEXT: br label [[COND_END:%.*]] -// CHECK12: cond.false: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: br label [[COND_END]] -// CHECK12: cond.end: -// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] -// CHECK12-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK12-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] -// CHECK12-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP20]], i32* [[L_CASTED]], align 4 -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[L_CASTED]], align 4 -// CHECK12-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK12-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP16]] to i8* -// CHECK12-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 -// CHECK12-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK12-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK12-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 -// CHECK12-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK12-NEXT: [[TMP27:%.*]] = inttoptr i32 [[TMP19]] to i8* -// CHECK12-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4 -// CHECK12-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK12-NEXT: [[TMP29:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* -// CHECK12-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4 -// CHECK12-NEXT: [[TMP30:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 -// CHECK12-NEXT: [[TMP31:%.*]] = inttoptr i32 [[TMP21]] to i8* -// CHECK12-NEXT: store i8* [[TMP31]], i8** [[TMP30]], align 4 -// CHECK12-NEXT: [[TMP32:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP32]], align 4 -// CHECK12-NEXT: [[TMP34:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK12-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP33]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP34]], i32 5) -// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] -// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] -// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP39]], [[TMP40]] -// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK12-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP41]], [[TMP42]] -// CHECK12-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] -// CHECK12: cond.true11: -// CHECK12-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 -// CHECK12-NEXT: br label [[COND_END13:%.*]] -// CHECK12: cond.false12: -// CHECK12-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: br label [[COND_END13]] -// CHECK12: cond.end13: -// CHECK12-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP43]], [[COND_TRUE11]] ], [ [[TMP44]], [[COND_FALSE12]] ] -// CHECK12-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP45]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] -// CHECK12: omp.inner.for.end: -// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK12: omp.loop.exit: -// CHECK12-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP47:%.*]] = load i32, i32* [[TMP46]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]]) -// CHECK12-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP49:%.*]] = icmp ne i32 [[TMP48]], 0 -// CHECK12-NEXT: br i1 [[TMP49]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK12: .omp.final.then: -// CHECK12-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP50]], 0 -// CHECK12-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 -// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 -// CHECK12-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD17]], i32* [[I4]], align 4 -// CHECK12-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK12: .omp.final.done: -// CHECK12-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP52:%.*]] = icmp ne i32 [[TMP51]], 0 -// CHECK12-NEXT: br i1 [[TMP52]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK12: .omp.lastprivate.then: -// CHECK12-NEXT: [[TMP53:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP53]], i32* [[L_ADDR]], align 4 -// CHECK12-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK12: .omp.lastprivate.done: -// CHECK12-NEXT: br label [[OMP_PRECOND_END]] -// CHECK12: omp.precond.end: -// CHECK12-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP1]]) -// CHECK12-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__4 +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* +// CHECK2-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP14:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP15]], i32 3) +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 +// CHECK2-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK2: cond.true5: +// CHECK2-NEXT: br label [[COND_END7:%.*]] +// CHECK2: cond.false6: +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END7]] +// CHECK2: cond.end7: +// CHECK2-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] +// CHECK2-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK2-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2: .omp.final.then: +// CHECK2-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK2: .omp.final.done: +// CHECK2-NEXT: ret void // // -// CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { -// CHECK12-NEXT: entry: -// CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK12-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 -// CHECK12-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 -// CHECK12-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK12-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK12-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK12: omp.precond.then: -// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) -// CHECK12-NEXT: br label [[OMP_DISPATCH_COND:%.*]] -// CHECK12: omp.dispatch.cond: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] -// CHECK12-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK12: cond.true: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: br label [[COND_END:%.*]] -// CHECK12: cond.false: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: br label [[COND_END]] -// CHECK12: cond.end: -// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] -// CHECK12-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] -// CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] -// CHECK12: omp.dispatch.body: -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] -// CHECK12-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 -// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] -// CHECK12-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK12-NEXT: store i32 [[TMP20]], i32* [[L_ADDR]], align 4 -// CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK12: omp.body.continue: -// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] -// CHECK12: omp.inner.for.end: -// CHECK12-NEXT: br label [[OMP_DISPATCH_INC:%.*]] -// CHECK12: omp.dispatch.inc: -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK12-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: br label [[OMP_DISPATCH_COND]] -// CHECK12: omp.dispatch.end: -// CHECK12-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) -// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 -// CHECK12-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK12: .omp.final.then: -// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP30]], 0 -// CHECK12-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 -// CHECK12-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 -// CHECK12-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] -// CHECK12-NEXT: store i32 [[ADD13]], i32* [[I3]], align 4 -// CHECK12-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK12: .omp.final.done: -// CHECK12-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 -// CHECK12-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] -// CHECK12: .omp.lastprivate.then: -// CHECK12-NEXT: [[TMP33:%.*]] = load i32, i32* [[L_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP33]], i32* [[L_ADDR]], align 4 -// CHECK12-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] -// CHECK12: .omp.lastprivate.done: -// CHECK12-NEXT: br label [[OMP_PRECOND_END]] -// CHECK12: omp.precond.end: -// CHECK12-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__5 +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK2-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK2-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK2-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2: .omp.final.then: +// CHECK2-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK2: .omp.final.done: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42 +// CHECK2-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK2: .execute: +// CHECK2-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[F_CASTED]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP3]]) #[[ATTR2]] +// CHECK2-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK2: .omp.deinit: +// CHECK2-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK2-NEXT: br label [[DOTEXIT:%.*]] +// CHECK2: .exit: +// CHECK2-NEXT: ret void // // -// CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l44 -// CHECK12-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK12-NEXT: entry: -// CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 -// CHECK12-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK12-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK12-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK12-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK12-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK12: .execute: -// CHECK12-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK12-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] -// CHECK12-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK12: .omp.deinit: -// CHECK12-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK12-NEXT: br label [[DOTEXIT:%.*]] -// CHECK12: .exit: -// CHECK12-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__6 +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 +// CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP9]], i32* [[F_CASTED]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK2-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to i8* +// CHECK2-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK2-NEXT: [[TMP16:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* +// CHECK2-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK2-NEXT: [[TMP18:%.*]] = inttoptr i32 [[TMP10]] to i8* +// CHECK2-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x [10 x i32]]*, i32)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP19]], i32 4) +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 +// CHECK2-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK2: cond.true6: +// CHECK2-NEXT: br label [[COND_END8:%.*]] +// CHECK2: cond.false7: +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: br label [[COND_END8]] +// CHECK2: cond.end8: +// CHECK2-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] +// CHECK2-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK2-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2: .omp.final.then: +// CHECK2-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 10, i32* [[J]], align 4 +// CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK2: .omp.final.done: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__7 +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK2-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK2-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK2-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK2-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK2: omp.inner.for.cond: +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK2-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK2: omp.inner.for.body: +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 +// CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK2-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10 +// CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]] +// CHECK2-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] +// CHECK2-NEXT: store i32 [[ADD5]], i32* [[J]], align 4 +// CHECK2-NEXT: store i32 10, i32* [[K]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK2-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] +// CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]] +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 +// CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]] +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP15]] +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 +// CHECK2-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP16]] +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX9]], align 4 +// CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK2: omp.body.continue: +// CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK2: omp.inner.for.inc: +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK2: omp.inner.for.end: +// CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK2: omp.loop.exit: +// CHECK2-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK2-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK2: .omp.final.then: +// CHECK2-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK2-NEXT: store i32 10, i32* [[J]], align 4 +// CHECK2-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK2: .omp.final.done: +// CHECK2-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l26 +// CHECK3-SAME: (i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK3-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK3: .execute: +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[L_CASTED]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i32]* [[TMP0]], i32 [[TMP5]]) #[[ATTR2:[0-9]+]] +// CHECK3-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK3: .omp.deinit: +// CHECK3-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK3-NEXT: br label [[DOTEXIT:%.*]] +// CHECK3: .exit: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK3-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_2:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I4:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[L_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [5 x i8*], align 4 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK3-NEXT: [[L1:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK3-NEXT: [[L_ON_STACK:%.*]] = bitcast i8* [[L1]] to i32* +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK3-NEXT: [[SUB3:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB3]], i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK3: omp.precond.then: +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 128) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP18]], i32* [[L_CASTED]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[L_CASTED]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP14]] to i8* +// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP15]] to i8* +// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP25:%.*]] = inttoptr i32 [[TMP17]] to i8* +// CHECK3-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP27:%.*]] = bitcast [1000 x i32]* [[TMP0]] to i8* +// CHECK3-NEXT: store i8* [[TMP27]], i8** [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [5 x i8*], [5 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 4 +// CHECK3-NEXT: [[TMP29:%.*]] = inttoptr i32 [[TMP19]] to i8* +// CHECK3-NEXT: store i8* [[TMP29]], i8** [[TMP28]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = bitcast [5 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP31]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i32]*, i32)* @__omp_outlined__1 to i8*), i8* null, i8** [[TMP32]], i32 5) +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP35]], [[TMP36]] +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP37]], [[TMP38]] +// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP39]], [[TMP40]] +// CHECK3-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] +// CHECK3: cond.true11: +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK3-NEXT: br label [[COND_END13:%.*]] +// CHECK3: cond.false12: +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END13]] +// CHECK3: cond.end13: +// CHECK3-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP41]], [[COND_TRUE11]] ], [ [[TMP42]], [[COND_FALSE12]] ] +// CHECK3-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP43]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP45:%.*]] = load i32, i32* [[TMP44]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP45]]) +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP47:%.*]] = icmp ne i32 [[TMP46]], 0 +// CHECK3-NEXT: br i1 [[TMP47]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3: .omp.final.then: +// CHECK3-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB15:%.*]] = sub nsw i32 [[TMP48]], 0 +// CHECK3-NEXT: [[DIV16:%.*]] = sdiv i32 [[SUB15]], 1 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV16]], 1 +// CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD17]], i32* [[I4]], align 4 +// CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK3: .omp.final.done: +// CHECK3-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP50:%.*]] = icmp ne i32 [[TMP49]], 0 +// CHECK3-NEXT: br i1 [[TMP50]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3: .omp.lastprivate.then: +// CHECK3-NEXT: [[TMP51:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP51]], i32* [[L_ADDR]], align 4 +// CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK3: .omp.lastprivate.done: +// CHECK3-NEXT: br label [[OMP_PRECOND_END]] +// CHECK3: omp.precond.end: +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[L1]]) +// CHECK3-NEXT: ret void // // -// CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__2 -// CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK12-NEXT: entry: -// CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 -// CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 -// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK12-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK12-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK12-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK12: omp.precond.then: -// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK12-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] -// CHECK12-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK12: cond.true: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: br label [[COND_END:%.*]] -// CHECK12: cond.false: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: br label [[COND_END]] -// CHECK12: cond.end: -// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] -// CHECK12-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK12-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] -// CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK12-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to i8* -// CHECK12-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK12-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to i8* -// CHECK12-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 -// CHECK12-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK12-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to i8* -// CHECK12-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 -// CHECK12-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK12-NEXT: [[TMP25:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* -// CHECK12-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 -// CHECK12-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK12-NEXT: [[TMP28:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK12-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP28]], i32 4) -// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] -// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] -// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] -// CHECK12-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] -// CHECK12: cond.true10: -// CHECK12-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: br label [[COND_END12:%.*]] -// CHECK12: cond.false11: -// CHECK12-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: br label [[COND_END12]] -// CHECK12: cond.end12: -// CHECK12-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] -// CHECK12-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP39]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] -// CHECK12: omp.inner.for.end: -// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK12: omp.loop.exit: -// CHECK12-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP41]]) -// CHECK12-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 -// CHECK12-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK12: .omp.final.then: -// CHECK12-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP44]], 0 -// CHECK12-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 -// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 -// CHECK12-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD16]], i32* [[I3]], align 4 -// CHECK12-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK12: .omp.final.done: -// CHECK12-NEXT: br label [[OMP_PRECOND_END]] -// CHECK12: omp.precond.end: -// CHECK12-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i32]* nonnull align 4 dereferenceable(4000) [[A:%.*]], i32 [[L:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca [1000 x i32]*, align 4 +// CHECK3-NEXT: [[L_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store [1000 x i32]* [[A]], [1000 x i32]** [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[L]], i32* [[L_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i32]*, [1000 x i32]** [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK3: omp.precond.then: +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 32) +// CHECK3-NEXT: br label [[OMP_DISPATCH_COND:%.*]] +// CHECK3: omp.dispatch.cond: +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp ugt i32 [[TMP9]], [[TMP10]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP11]], [[COND_TRUE]] ], [ [[TMP12]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]] +// CHECK3: omp.dispatch.body: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i32], [1000 x i32]* [[TMP0]], i32 0, i32 [[TMP19]] +// CHECK3-NEXT: store i32 1, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[L_ADDR]], align 4 +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3: omp.body.continue: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], 1 +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] +// CHECK3: omp.dispatch.inc: +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: br label [[OMP_DISPATCH_COND]] +// CHECK3: omp.dispatch.end: +// CHECK3-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP27]]) +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP29:%.*]] = icmp ne i32 [[TMP28]], 0 +// CHECK3-NEXT: br i1 [[TMP29]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3: .omp.final.then: +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB10:%.*]] = sub nsw i32 [[TMP30]], 0 +// CHECK3-NEXT: [[DIV11:%.*]] = sdiv i32 [[SUB10]], 1 +// CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i32 [[DIV11]], 1 +// CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 0, [[MUL12]] +// CHECK3-NEXT: store i32 [[ADD13]], i32* [[I3]], align 4 +// CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK3: .omp.final.done: +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = icmp ne i32 [[TMP31]], 0 +// CHECK3-NEXT: br i1 [[TMP32]], label [[DOTOMP_LASTPRIVATE_THEN:%.*]], label [[DOTOMP_LASTPRIVATE_DONE:%.*]] +// CHECK3: .omp.lastprivate.then: +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[L_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP33]], i32* [[L_ADDR]], align 4 +// CHECK3-NEXT: br label [[DOTOMP_LASTPRIVATE_DONE]] +// CHECK3: .omp.lastprivate.done: +// CHECK3-NEXT: br label [[OMP_PRECOND_END]] +// CHECK3: omp.precond.end: +// CHECK3-NEXT: ret void // // -// CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { -// CHECK12-NEXT: entry: -// CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK12-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 -// CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[I3:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK12-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 -// CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 -// CHECK12-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 -// CHECK12-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: store i32 0, i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] -// CHECK12-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] -// CHECK12: omp.precond.then: -// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 -// CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] -// CHECK12-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 -// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[I3]], align 4 -// CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK12-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 -// CHECK12-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 -// CHECK12-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1 -// CHECK12-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK12-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX]], align 2 -// CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK12: omp.body.continue: -// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] -// CHECK12-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] -// CHECK12: omp.inner.for.end: -// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK12: omp.loop.exit: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK12-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK12: .omp.final.then: -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK12-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP21]], 0 -// CHECK12-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 -// CHECK12-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 -// CHECK12-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] -// CHECK12-NEXT: store i32 [[ADD11]], i32* [[I3]], align 4 -// CHECK12-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK12: .omp.final.done: -// CHECK12-NEXT: br label [[OMP_PRECOND_END]] -// CHECK12: omp.precond.end: -// CHECK12-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l32 +// CHECK3-SAME: (i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK3: .execute: +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: call void @__omp_outlined__2(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32 [[TMP3]], [1000 x i16]* [[TMP0]]) #[[ATTR2]] +// CHECK3-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK3: .omp.deinit: +// CHECK3-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK3-NEXT: br label [[DOTEXIT:%.*]] +// CHECK3: .exit: +// CHECK3-NEXT: ret void // // -// CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l49 -// CHECK12-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK12-NEXT: entry: -// CHECK12-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK12-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK12-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK12-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK12-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK12-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK12-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK12: .execute: -// CHECK12-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK12-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK12-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] -// CHECK12-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK12: .omp.deinit: -// CHECK12-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK12-NEXT: br label [[DOTEXIT:%.*]] -// CHECK12: .exit: -// CHECK12-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__2 +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK3: omp.precond.then: +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP7]], [[TMP8]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], 1 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp slt i32 [[TMP12]], [[ADD]] +// CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP16]], i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_CASTED]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP19:%.*]] = inttoptr i32 [[TMP14]] to i8* +// CHECK3-NEXT: store i8* [[TMP19]], i8** [[TMP18]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP21:%.*]] = inttoptr i32 [[TMP15]] to i8* +// CHECK3-NEXT: store i8* [[TMP21]], i8** [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP23:%.*]] = inttoptr i32 [[TMP17]] to i8* +// CHECK3-NEXT: store i8* [[TMP23]], i8** [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP25:%.*]] = bitcast [1000 x i16]* [[TMP0]] to i8* +// CHECK3-NEXT: store i8* [[TMP25]], i8** [[TMP24]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP27]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, i32, [1000 x i16]*)* @__omp_outlined__3 to i8*), i8* null, i8** [[TMP28]], i32 4) +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP29]], [[TMP30]] +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP31]], [[TMP32]] +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: [[CMP9:%.*]] = icmp sgt i32 [[TMP35]], [[TMP36]] +// CHECK3-NEXT: br i1 [[CMP9]], label [[COND_TRUE10:%.*]], label [[COND_FALSE11:%.*]] +// CHECK3: cond.true10: +// CHECK3-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: br label [[COND_END12:%.*]] +// CHECK3: cond.false11: +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END12]] +// CHECK3: cond.end12: +// CHECK3-NEXT: [[COND13:%.*]] = phi i32 [ [[TMP37]], [[COND_TRUE10]] ], [ [[TMP38]], [[COND_FALSE11]] ] +// CHECK3-NEXT: store i32 [[COND13]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP39]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32, i32* [[TMP40]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP41]]) +// CHECK3-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP43:%.*]] = icmp ne i32 [[TMP42]], 0 +// CHECK3-NEXT: br i1 [[TMP43]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3: .omp.final.then: +// CHECK3-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB14:%.*]] = sub nsw i32 [[TMP44]], 0 +// CHECK3-NEXT: [[DIV15:%.*]] = sdiv i32 [[SUB14]], 1 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV15]], 1 +// CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD16]], i32* [[I3]], align 4 +// CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK3: .omp.final.done: +// CHECK3-NEXT: br label [[OMP_PRECOND_END]] +// CHECK3: omp.precond.end: +// CHECK3-NEXT: ret void // // -// CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__4 -// CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK12-NEXT: entry: -// CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK12-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 -// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK12-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK12-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK12-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 -// CHECK12-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK12: cond.true: -// CHECK12-NEXT: br label [[COND_END:%.*]] -// CHECK12: cond.false: -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: br label [[COND_END]] -// CHECK12: cond.end: -// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] -// CHECK12-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 -// CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK12-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK12-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK12-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK12-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK12-NEXT: [[TMP14:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* -// CHECK12-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK12-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP15]], i32 3) -// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 -// CHECK12-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] -// CHECK12: cond.true5: -// CHECK12-NEXT: br label [[COND_END7:%.*]] -// CHECK12: cond.false6: -// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: br label [[COND_END7]] -// CHECK12: cond.end7: -// CHECK12-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] -// CHECK12-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] -// CHECK12: omp.inner.for.end: -// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK12: omp.loop.exit: -// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 -// CHECK12-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK12: .omp.final.then: -// CHECK12-NEXT: store i32 10, i32* [[I]], align 4 -// CHECK12-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK12: .omp.final.done: -// CHECK12-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__3 +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], i32 [[N:%.*]], [1000 x i16]* nonnull align 2 dereferenceable(2000) [[AA:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[AA_ADDR:%.*]] = alloca [1000 x i16]*, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCAPTURE_EXPR_1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I3:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store [1000 x i16]* [[AA]], [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [1000 x i16]*, [1000 x i16]** [[AA_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[N_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP2]], 0 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[SUB]], 1 +// CHECK3-NEXT: [[SUB2:%.*]] = sub nsw i32 [[DIV]], 1 +// CHECK3-NEXT: store i32 [[SUB2]], i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp slt i32 0, [[TMP3]] +// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]] +// CHECK3: omp.precond.then: +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 +// CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP8]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]] +// CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [1000 x i16], [1000 x i16]* [[TMP0]], i32 0, i32 [[TMP13]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i16, i16* [[ARRAYIDX]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP14]] to i32 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV]], 1 +// CHECK3-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 +// CHECK3-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX]], align 2 +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3: omp.body.continue: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP15]], [[TMP16]] +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: [[TMP17:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[TMP17]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP18]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3: .omp.final.then: +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 +// CHECK3-NEXT: [[SUB8:%.*]] = sub nsw i32 [[TMP21]], 0 +// CHECK3-NEXT: [[DIV9:%.*]] = sdiv i32 [[SUB8]], 1 +// CHECK3-NEXT: [[MUL10:%.*]] = mul nsw i32 [[DIV9]], 1 +// CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 0, [[MUL10]] +// CHECK3-NEXT: store i32 [[ADD11]], i32* [[I3]], align 4 +// CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK3: .omp.final.done: +// CHECK3-NEXT: br label [[OMP_PRECOND_END]] +// CHECK3: omp.precond.end: +// CHECK3-NEXT: ret void // // -// CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__5 -// CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { -// CHECK12-NEXT: entry: -// CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK12-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 -// CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 -// CHECK12-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] -// CHECK12-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 -// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 -// CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 -// CHECK12-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 -// CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK12: omp.body.continue: -// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] -// CHECK12: omp.inner.for.end: -// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK12: omp.loop.exit: -// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 -// CHECK12-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK12: .omp.final.then: -// CHECK12-NEXT: store i32 10, i32* [[I]], align 4 -// CHECK12-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK12: .omp.final.done: -// CHECK12-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l37 +// CHECK3-SAME: ([10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK3: .execute: +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: call void @__omp_outlined__4(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x i32]* [[TMP0]]) #[[ATTR2]] +// CHECK3-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK3: .omp.deinit: +// CHECK3-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK3-NEXT: br label [[DOTEXIT:%.*]] +// CHECK3: .exit: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__4 +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [3 x i8*], align 4 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 9, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 9 +// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 9, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[CMP1:%.*]] = icmp slt i32 [[TMP6]], 10 +// CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP10:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK3-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP8]] to i8* +// CHECK3-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP14:%.*]] = bitcast [10 x i32]* [[TMP0]] to i8* +// CHECK3-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = bitcast [3 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @__omp_outlined__5 to i8*), i8* null, i8** [[TMP15]], i32 3) +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP4:%.*]] = icmp sgt i32 [[TMP22]], 9 +// CHECK3-NEXT: br i1 [[CMP4]], label [[COND_TRUE5:%.*]], label [[COND_FALSE6:%.*]] +// CHECK3: cond.true5: +// CHECK3-NEXT: br label [[COND_END7:%.*]] +// CHECK3: cond.false6: +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END7]] +// CHECK3: cond.end7: +// CHECK3-NEXT: [[COND8:%.*]] = phi i32 [ 9, [[COND_TRUE5]] ], [ [[TMP23]], [[COND_FALSE6]] ] +// CHECK3-NEXT: store i32 [[COND8]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP24]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = icmp ne i32 [[TMP25]], 0 +// CHECK3-NEXT: br i1 [[TMP26]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3: .omp.final.then: +// CHECK3-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK3: .omp.final.done: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__5 +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x i32]* nonnull align 4 dereferenceable(40) [[B:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca [10 x i32]*, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store [10 x i32]* [[B]], [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x i32]*, [10 x i32]** [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 9, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP10]], 1 +// CHECK3-NEXT: store i32 [[ADD1]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3: omp.body.continue: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = icmp ne i32 [[TMP13]], 0 +// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3: .omp.final.then: +// CHECK3-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK3: .omp.final.done: +// CHECK3-NEXT: ret void // // -// CHECK12-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l54 -// CHECK12-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { -// CHECK12-NEXT: entry: -// CHECK12-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK12-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK12-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 -// CHECK12-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK12-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK12-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) -// CHECK12-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK12: .execute: -// CHECK12-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[F_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP2]], i32* [[F_CASTED]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[F_CASTED]], align 4 -// CHECK12-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK12-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP3]]) #[[ATTR2]] -// CHECK12-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK12: .omp.deinit: -// CHECK12-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) -// CHECK12-NEXT: br label [[DOTEXIT:%.*]] -// CHECK12: .exit: -// CHECK12-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIiET_i_l42 +// CHECK3-SAME: ([10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 0) +// CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK3: .execute: +// CHECK3-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[F_CASTED]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: call void @__omp_outlined__6(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], [10 x [10 x i32]]* [[TMP0]], i32 [[TMP3]]) #[[ATTR2]] +// CHECK3-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK3: .omp.deinit: +// CHECK3-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 0) +// CHECK3-NEXT: br label [[DOTEXIT:%.*]] +// CHECK3: .exit: +// CHECK3-NEXT: ret void // // -// CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__6 -// CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { -// CHECK12-NEXT: entry: -// CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK12-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK12-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 -// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK12-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 -// CHECK12-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK12-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) -// CHECK12-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 -// CHECK12-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK12: cond.true: -// CHECK12-NEXT: br label [[COND_END:%.*]] -// CHECK12: cond.false: -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: br label [[COND_END]] -// CHECK12: cond.end: -// CHECK12-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] -// CHECK12-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 -// CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP9]], i32* [[F_CASTED]], align 4 -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[F_CASTED]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK12-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to i8* -// CHECK12-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK12-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to i8* -// CHECK12-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 -// CHECK12-NEXT: [[TMP16:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* -// CHECK12-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 -// CHECK12-NEXT: [[TMP18:%.*]] = inttoptr i32 [[TMP10]] to i8* -// CHECK12-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4 -// CHECK12-NEXT: [[TMP19:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK12-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x [10 x i32]]*, i32)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP19]], i32 4) -// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 -// CHECK12-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] -// CHECK12: cond.true6: -// CHECK12-NEXT: br label [[COND_END8:%.*]] -// CHECK12: cond.false7: -// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: br label [[COND_END8]] -// CHECK12: cond.end8: -// CHECK12-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] -// CHECK12-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK12-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] -// CHECK12: omp.inner.for.end: -// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK12: omp.loop.exit: -// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) -// CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 -// CHECK12-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK12: .omp.final.then: -// CHECK12-NEXT: store i32 10, i32* [[I]], align 4 -// CHECK12-NEXT: store i32 10, i32* [[J]], align 4 -// CHECK12-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK12: .omp.final.done: -// CHECK12-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__6 +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_COMB_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[F_CASTED:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [4 x i8*], align 4 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 99, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: [[TMP1:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]], i32 91, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_COMB_LB]], i32* [[DOTOMP_COMB_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 [[NVPTX_NUM_THREADS]]) +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP3]], 99 +// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[TMP4]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i32 [[COND]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP6]], 100 +// CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP9]], i32* [[F_CASTED]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[F_CASTED]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP12:%.*]] = inttoptr i32 [[TMP7]] to i8* +// CHECK3-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP14:%.*]] = inttoptr i32 [[TMP8]] to i8* +// CHECK3-NEXT: store i8* [[TMP14]], i8** [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 2 +// CHECK3-NEXT: [[TMP16:%.*]] = bitcast [10 x [10 x i32]]* [[TMP0]] to i8* +// CHECK3-NEXT: store i8* [[TMP16]], i8** [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = getelementptr inbounds [4 x i8*], [4 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 3 +// CHECK3-NEXT: [[TMP18:%.*]] = inttoptr i32 [[TMP10]] to i8* +// CHECK3-NEXT: store i8* [[TMP18]], i8** [[TMP17]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = bitcast [4 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP2]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32, i32, [10 x [10 x i32]]*, i32)* @__omp_outlined__7 to i8*), i8* null, i8** [[TMP19]], i32 4) +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[CMP5:%.*]] = icmp sgt i32 [[TMP26]], 99 +// CHECK3-NEXT: br i1 [[CMP5]], label [[COND_TRUE6:%.*]], label [[COND_FALSE7:%.*]] +// CHECK3: cond.true6: +// CHECK3-NEXT: br label [[COND_END8:%.*]] +// CHECK3: cond.false7: +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: br label [[COND_END8]] +// CHECK3: cond.end8: +// CHECK3-NEXT: [[COND9:%.*]] = phi i32 [ 99, [[COND_TRUE6]] ], [ [[TMP27]], [[COND_FALSE7]] ] +// CHECK3-NEXT: store i32 [[COND9]], i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP28]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 +// CHECK3-NEXT: br i1 [[TMP30]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3: .omp.final.then: +// CHECK3-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 10, i32* [[J]], align 4 +// CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK3: .omp.final.done: +// CHECK3-NEXT: ret void // // -// CHECK12-LABEL: define {{[^@]+}}@__omp_outlined__7 -// CHECK12-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { -// CHECK12-NEXT: entry: -// CHECK12-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK12-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK12-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 -// CHECK12-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[TMP:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[K:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[I:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: [[J:%.*]] = alloca i32, align 4 -// CHECK12-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK12-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 -// CHECK12-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 -// CHECK12-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 -// CHECK12-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK12-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 -// CHECK12-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 -// CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] -// CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 -// CHECK12-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] -// CHECK12-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] -// CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 -// CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 -// CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10 -// CHECK12-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10 -// CHECK12-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]] -// CHECK12-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 -// CHECK12-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] -// CHECK12-NEXT: store i32 [[ADD5]], i32* [[J]], align 4 -// CHECK12-NEXT: store i32 10, i32* [[K]], align 4 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[F_ADDR]], align 4 -// CHECK12-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] -// CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]] -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 -// CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]] -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 -// CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP15]] -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 -// CHECK12-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP16]] -// CHECK12-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX9]], align 4 -// CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] -// CHECK12: omp.body.continue: -// CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] -// CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 -// CHECK12-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] -// CHECK12-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] -// CHECK12: omp.inner.for.end: -// CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] -// CHECK12: omp.loop.exit: -// CHECK12-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 -// CHECK12-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 -// CHECK12-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] -// CHECK12: .omp.final.then: -// CHECK12-NEXT: store i32 10, i32* [[I]], align 4 -// CHECK12-NEXT: store i32 10, i32* [[J]], align 4 -// CHECK12-NEXT: br label [[DOTOMP_FINAL_DONE]] -// CHECK12: .omp.final.done: -// CHECK12-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__7 +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32 [[DOTPREVIOUS_LB_:%.*]], i32 [[DOTPREVIOUS_UB_:%.*]], [10 x [10 x i32]]* nonnull align 4 dereferenceable(400) [[C:%.*]], i32 [[F:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_LB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTPREVIOUS_UB__ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca [10 x [10 x i32]]*, align 4 +// CHECK3-NEXT: [[F_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IV:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[_TMP1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_LB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_UB:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[K:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[I:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[J:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_LB_]], i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[DOTPREVIOUS_UB_]], i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store [10 x [10 x i32]]* [[C]], [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[F]], i32* [[F_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load [10 x [10 x i32]]*, [10 x [10 x i32]]** [[C_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 99, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i32, i32* [[DOTPREVIOUS_LB__ADDR]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4 +// CHECK3-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB2]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4 +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] +// CHECK3: omp.inner.for.cond: +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTPREVIOUS_UB__ADDR]], align 4 +// CHECK3-NEXT: [[CMP:%.*]] = icmp ule i32 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] +// CHECK3: omp.inner.for.body: +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP8]], 10 +// CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 +// CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP10]], 10 +// CHECK3-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 10 +// CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP9]], [[MUL3]] +// CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 +// CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 0, [[MUL4]] +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[J]], align 4 +// CHECK3-NEXT: store i32 10, i32* [[K]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[J]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[F_ADDR]], align 4 +// CHECK3-NEXT: [[MUL6:%.*]] = mul nsw i32 [[TMP12]], [[TMP13]] +// CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], [[MUL6]] +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[K]], align 4 +// CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[ADD7]], [[TMP14]] +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x [10 x i32]], [10 x [10 x i32]]* [[TMP0]], i32 0, i32 [[TMP15]] +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[J]], align 4 +// CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[ARRAYIDX]], i32 0, i32 [[TMP16]] +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[ARRAYIDX9]], align 4 +// CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] +// CHECK3: omp.body.continue: +// CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] +// CHECK3: omp.inner.for.inc: +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK3: omp.inner.for.end: +// CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] +// CHECK3: omp.loop.exit: +// CHECK3-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP4]]) +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IS_LAST]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = icmp ne i32 [[TMP19]], 0 +// CHECK3-NEXT: br i1 [[TMP20]], label [[DOTOMP_FINAL_THEN:%.*]], label [[DOTOMP_FINAL_DONE:%.*]] +// CHECK3: .omp.final.then: +// CHECK3-NEXT: store i32 10, i32* [[I]], align 4 +// CHECK3-NEXT: store i32 10, i32* [[J]], align 4 +// CHECK3-NEXT: br label [[DOTOMP_FINAL_DONE]] +// CHECK3: .omp.final.done: +// CHECK3-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_teams_codegen.cpp b/clang/test/OpenMP/nvptx_teams_codegen.cpp index c53ce6119d210..78f65064a9b9f 100644 --- a/clang/test/OpenMP/nvptx_teams_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_codegen.cpp @@ -2,10 +2,8 @@ // Test target codegen - host bc file has to be created first. // RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc // RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK1 -// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK2 // RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK3 -// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK4 +// RUN: %clang_cc1 -DCK1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2 // expected-no-diagnostics #ifndef HEADER #define HEADER @@ -42,11 +40,9 @@ int main (int argc, char **argv) { // Test target codegen - host bc file has to be created first. // RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK5 -// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK6 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK3 // RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK7 -// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK8 +// RUN: %clang_cc1 -DCK2 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK4 // expected-no-diagnostics #ifdef CK2 @@ -80,828 +76,6 @@ int main (int argc, char **argv) { #endif // CK2 #endif -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_worker -// CHECK1-SAME: () #[[ATTR0:[0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: store i8* null, i8** [[WORK_FN]], align 8 -// CHECK1-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK1-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK1: .await.work: -// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK1-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK1-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK1-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK1-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK1-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK1: .select.workers: -// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK1-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK1-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK1: .execute.parallel: -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK1-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) -// CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK1: .terminate.parallel: -// CHECK1-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK1-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK1: .barrier.parallel: -// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK1-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK1: .exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25 -// CHECK1-SAME: (i64 [[ARGC:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* -// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK1-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK1-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK1: .worker: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_worker() #[[ATTR3:[0-9]+]] -// CHECK1-NEXT: br label [[DOTEXIT:%.*]] -// CHECK1: .mastercheck: -// CHECK1-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK1-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK1-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK1-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK1-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK1: .master: -// CHECK1-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK1-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* @"_openmp_static_kernel$size", align 8 -// CHECK1-NEXT: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i64 [[TMP6]], i16 [[TMP5]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) -// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct._globalized_locals_ty* -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK1-NEXT: [[ARGC7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP9]], i32 0, i32 0 -// CHECK1-NEXT: store i32 [[TMP10]], i32* [[ARGC7]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC7]]) #[[ATTR3]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK1-NEXT: call void @__kmpc_restore_team_static_memory(i16 0, i16 [[TMP12]]) -// CHECK1-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK1: .termination.notifier: -// CHECK1-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK1-NEXT: br label [[DOTEXIT]] -// CHECK1: .exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[TMP0]], align 4 -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l17_worker -// CHECK1-SAME: () #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: store i8* null, i8** [[WORK_FN]], align 8 -// CHECK1-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK1-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK1: .await.work: -// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK1-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK1-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK1-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK1-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK1-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK1: .select.workers: -// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK1-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK1-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK1: .execute.parallel: -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK1-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) -// CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK1: .terminate.parallel: -// CHECK1-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK1-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK1: .barrier.parallel: -// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK1-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK1: .exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l17 -// CHECK1-SAME: (i8** [[ARGC:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 8 -// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK1-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK1-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK1: .worker: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l17_worker() #[[ATTR3]] -// CHECK1-NEXT: br label [[DOTEXIT:%.*]] -// CHECK1: .mastercheck: -// CHECK1-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK1-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK1-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK1-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK1-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK1: .master: -// CHECK1-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK1-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared1", align 2 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* @"_openmp_static_kernel$size2", align 8 -// CHECK1-NEXT: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i64 [[TMP6]], i16 [[TMP5]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) -// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct._globalized_locals_ty.0* -// CHECK1-NEXT: [[TMP10:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[ARGC7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP9]], i32 0, i32 0 -// CHECK1-NEXT: store i8** [[TMP10]], i8*** [[ARGC7]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC7]]) #[[ATTR3]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared1", align 2 -// CHECK1-NEXT: call void @__kmpc_restore_team_static_memory(i16 0, i16 [[TMP12]]) -// CHECK1-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK1: .termination.notifier: -// CHECK1-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK1-NEXT: br label [[DOTEXIT]] -// CHECK1: .exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8*** nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 8 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8 -// CHECK1-NEXT: store i8** null, i8*** [[TMP0]], align 8 -// CHECK1-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_worker -// CHECK2-SAME: () #[[ATTR0:[0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK2-NEXT: store i8* null, i8** [[WORK_FN]], align 8 -// CHECK2-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK2-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK2: .await.work: -// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK2-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK2-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK2-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK2-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK2-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK2: .select.workers: -// CHECK2-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK2-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK2-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK2: .execute.parallel: -// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) -// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK2-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) -// CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK2: .terminate.parallel: -// CHECK2-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK2-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK2: .barrier.parallel: -// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK2-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK2: .exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25 -// CHECK2-SAME: (i64 [[ARGC:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* -// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK2-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK2-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK2: .worker: -// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_worker() #[[ATTR3:[0-9]+]] -// CHECK2-NEXT: br label [[DOTEXIT:%.*]] -// CHECK2: .mastercheck: -// CHECK2-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK2-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK2-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK2-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK2-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK2-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK2: .master: -// CHECK2-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK2-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 1) -// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty* -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV]], align 8 -// CHECK2-NEXT: [[ARGC7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: store i32 [[TMP7]], i32* [[ARGC7]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-NEXT: store i32 [[TMP8]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC7]]) #[[ATTR3]] -// CHECK2-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP5]]) -// CHECK2-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK2: .termination.notifier: -// CHECK2-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK2-NEXT: br label [[DOTEXIT]] -// CHECK2: .exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[TMP0]], align 4 -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l17_worker -// CHECK2-SAME: () #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK2-NEXT: store i8* null, i8** [[WORK_FN]], align 8 -// CHECK2-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK2-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK2: .await.work: -// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK2-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK2-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK2-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK2-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK2-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK2: .select.workers: -// CHECK2-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK2-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK2-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK2: .execute.parallel: -// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK2-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) -// CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK2: .terminate.parallel: -// CHECK2-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK2-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK2: .barrier.parallel: -// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK2-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK2: .exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l17 -// CHECK2-SAME: (i8** [[ARGC:%.*]]) #[[ATTR1]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 8 -// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK2-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK2-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK2: .worker: -// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l17_worker() #[[ATTR3]] -// CHECK2-NEXT: br label [[DOTEXIT:%.*]] -// CHECK2: .mastercheck: -// CHECK2-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK2-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK2-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK2-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK2-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK2-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK2: .master: -// CHECK2-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK2-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i64 8, i16 1) -// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.0* -// CHECK2-NEXT: [[TMP7:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: [[ARGC7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: store i8** [[TMP7]], i8*** [[ARGC7]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-NEXT: store i32 [[TMP8]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC7]]) #[[ATTR3]] -// CHECK2-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP5]]) -// CHECK2-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK2: .termination.notifier: -// CHECK2-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK2-NEXT: br label [[DOTEXIT]] -// CHECK2: .exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8*** nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR1]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 8 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8 -// CHECK2-NEXT: store i8** null, i8*** [[TMP0]], align 8 -// CHECK2-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_worker -// CHECK3-SAME: () #[[ATTR0:[0-9]+]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK3-NEXT: store i8* null, i8** [[WORK_FN]], align 4 -// CHECK3-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK3-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK3: .await.work: -// CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK3-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK3-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK3-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK3-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK3-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK3: .select.workers: -// CHECK3-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK3-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK3-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK3: .execute.parallel: -// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK3-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) -// CHECK3-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK3: .terminate.parallel: -// CHECK3-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK3-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK3: .barrier.parallel: -// CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK3-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK3: .exit: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25 -// CHECK3-SAME: (i32 [[ARGC:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 -// CHECK3-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK3-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK3-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK3-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK3: .worker: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_worker() #[[ATTR3:[0-9]+]] -// CHECK3-NEXT: br label [[DOTEXIT:%.*]] -// CHECK3: .mastercheck: -// CHECK3-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK3-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK3-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK3-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK3-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK3-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK3-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK3-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK3: .master: -// CHECK3-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK3-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK3-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* @"_openmp_static_kernel$size", align 4 -// CHECK3-NEXT: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i32 [[TMP6]], i16 [[TMP5]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) -// CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct._globalized_locals_ty* -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK3-NEXT: [[ARGC7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP9]], i32 0, i32 0 -// CHECK3-NEXT: store i32 [[TMP10]], i32* [[ARGC7]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC7]]) #[[ATTR3]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK3-NEXT: call void @__kmpc_restore_team_static_memory(i16 0, i16 [[TMP12]]) -// CHECK3-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK3: .termination.notifier: -// CHECK3-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK3-NEXT: br label [[DOTEXIT]] -// CHECK3: .exit: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[TMP0]], align 4 -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l17_worker -// CHECK3-SAME: () #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK3-NEXT: store i8* null, i8** [[WORK_FN]], align 4 -// CHECK3-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK3-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK3: .await.work: -// CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK3-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK3-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK3-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK3-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK3-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK3: .select.workers: -// CHECK3-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK3-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK3-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK3: .execute.parallel: -// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK3-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) -// CHECK3-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK3: .terminate.parallel: -// CHECK3-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK3-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK3: .barrier.parallel: -// CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK3-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK3: .exit: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l17 -// CHECK3-SAME: (i8** [[ARGC:%.*]]) #[[ATTR1]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 4 -// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK3-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 4 -// CHECK3-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK3-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK3-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK3-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK3: .worker: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l17_worker() #[[ATTR3]] -// CHECK3-NEXT: br label [[DOTEXIT:%.*]] -// CHECK3: .mastercheck: -// CHECK3-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK3-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK3-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK3-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK3-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK3-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK3-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK3-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK3: .master: -// CHECK3-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK3-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK3-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared1", align 2 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* @"_openmp_static_kernel$size2", align 4 -// CHECK3-NEXT: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i32 [[TMP6]], i16 [[TMP5]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) -// CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct._globalized_locals_ty.0* -// CHECK3-NEXT: [[TMP10:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 4 -// CHECK3-NEXT: [[ARGC7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP9]], i32 0, i32 0 -// CHECK3-NEXT: store i8** [[TMP10]], i8*** [[ARGC7]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC7]]) #[[ATTR3]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared1", align 2 -// CHECK3-NEXT: call void @__kmpc_restore_team_static_memory(i16 0, i16 [[TMP12]]) -// CHECK3-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK3: .termination.notifier: -// CHECK3-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK3-NEXT: br label [[DOTEXIT]] -// CHECK3: .exit: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8*** nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 4 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 4 -// CHECK3-NEXT: store i8** null, i8*** [[TMP0]], align 4 -// CHECK3-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_worker -// CHECK4-SAME: () #[[ATTR0:[0-9]+]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 -// CHECK4-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK4-NEXT: store i8* null, i8** [[WORK_FN]], align 4 -// CHECK4-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK4-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK4: .await.work: -// CHECK4-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK4-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK4-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK4-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK4-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK4-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK4-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK4: .select.workers: -// CHECK4-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK4-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK4-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK4: .execute.parallel: -// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) -// CHECK4-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK4-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) -// CHECK4-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK4: .terminate.parallel: -// CHECK4-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK4-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK4: .barrier.parallel: -// CHECK4-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK4-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK4: .exit: -// CHECK4-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25 -// CHECK4-SAME: (i32 [[ARGC:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK4-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK4-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK4-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK4-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK4-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK4: .worker: -// CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l25_worker() #[[ATTR3:[0-9]+]] -// CHECK4-NEXT: br label [[DOTEXIT:%.*]] -// CHECK4: .mastercheck: -// CHECK4-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK4-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK4-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK4-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK4-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK4-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK4-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK4-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK4-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK4: .master: -// CHECK4-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK4-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK4-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK4-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK4-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK4-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 4, i16 1) -// CHECK4-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty* -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[ARGC7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP6]], i32 0, i32 0 -// CHECK4-NEXT: store i32 [[TMP7]], i32* [[ARGC7]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: store i32 [[TMP8]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC7]]) #[[ATTR3]] -// CHECK4-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP5]]) -// CHECK4-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK4: .termination.notifier: -// CHECK4-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK4-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK4-NEXT: br label [[DOTEXIT]] -// CHECK4: .exit: -// CHECK4-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[TMP0]], align 4 -// CHECK4-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l17_worker -// CHECK4-SAME: () #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 -// CHECK4-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK4-NEXT: store i8* null, i8** [[WORK_FN]], align 4 -// CHECK4-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK4-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK4: .await.work: -// CHECK4-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK4-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK4-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK4-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK4-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK4-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK4-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK4: .select.workers: -// CHECK4-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK4-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK4-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK4: .execute.parallel: -// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK4-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) -// CHECK4-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK4: .terminate.parallel: -// CHECK4-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK4-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK4: .barrier.parallel: -// CHECK4-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK4-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK4: .exit: -// CHECK4-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l17 -// CHECK4-SAME: (i8** [[ARGC:%.*]]) #[[ATTR1]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 4 -// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK4-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK4-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK4-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK4-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK4-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK4: .worker: -// CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l17_worker() #[[ATTR3]] -// CHECK4-NEXT: br label [[DOTEXIT:%.*]] -// CHECK4: .mastercheck: -// CHECK4-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK4-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK4-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK4-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK4-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK4-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK4-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK4-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK4-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK4: .master: -// CHECK4-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK4-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK4-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK4-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK4-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK4-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 4, i16 1) -// CHECK4-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.0* -// CHECK4-NEXT: [[TMP7:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[ARGC7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 0 -// CHECK4-NEXT: store i8** [[TMP7]], i8*** [[ARGC7]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK4-NEXT: store i32 [[TMP8]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK4-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC7]]) #[[ATTR3]] -// CHECK4-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP5]]) -// CHECK4-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK4: .termination.notifier: -// CHECK4-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK4-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK4-NEXT: br label [[DOTEXIT]] -// CHECK4: .exit: -// CHECK4-NEXT: ret void -// -// -// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8*** nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 4 -// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK4-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 4 -// CHECK4-NEXT: store i8** null, i8*** [[TMP0]], align 4 -// CHECK4-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68_worker // CHECK5-SAME: () #[[ATTR0:[0-9]+]] { // CHECK5-NEXT: entry: @@ -935,8 +109,6 @@ int main (int argc, char **argv) { // CHECK5-NEXT: br label [[DOTAWAIT_WORK]] // CHECK5: .exit: // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 // CHECK5-SAME: (i64 [[A:%.*]], i64 [[B:%.*]], i64 [[ARGC:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK5-NEXT: entry: @@ -998,8 +170,6 @@ int main (int argc, char **argv) { // CHECK5-NEXT: br label [[DOTEXIT]] // CHECK5: .exit: // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: @@ -1012,8 +182,6 @@ int main (int argc, char **argv) { // CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 // CHECK5-NEXT: store i32 0, i32* [[TMP0]], align 4 // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57_worker // CHECK5-SAME: () #[[ATTR0]] { // CHECK5-NEXT: entry: @@ -1047,8 +215,6 @@ int main (int argc, char **argv) { // CHECK5-NEXT: br label [[DOTAWAIT_WORK]] // CHECK5: .exit: // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57 // CHECK5-SAME: (i64 [[A:%.*]], i64 [[B:%.*]], i8** [[ARGC:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: @@ -1109,8 +275,6 @@ int main (int argc, char **argv) { // CHECK5-NEXT: br label [[DOTEXIT]] // CHECK5: .exit: // CHECK5-NEXT: ret void -// -// // CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__3 // CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8*** nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR1]] { // CHECK5-NEXT: entry: @@ -1123,643 +287,1411 @@ int main (int argc, char **argv) { // CHECK5-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8 // CHECK5-NEXT: store i8** null, i8*** [[TMP0]], align 8 // CHECK5-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68_worker +// CHECK6-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 +// CHECK6-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK6-NEXT: store i8* null, i8** [[WORK_FN]], align 8 +// CHECK6-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK6-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK6: .await.work: +// CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK6-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK6-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK6-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK6-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 +// CHECK6-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK6-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK6: .select.workers: +// CHECK6-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK6-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK6-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK6: .execute.parallel: +// CHECK6-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK6-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK6-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) +// CHECK6-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK6: .terminate.parallel: +// CHECK6-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK6-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK6: .barrier.parallel: +// CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK6-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK6: .exit: +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 +// CHECK6-SAME: (i64 [[A:%.*]], i64 [[B:%.*]], i64 [[ARGC:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK6-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 +// CHECK6-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK6-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK6-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 +// CHECK6-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 +// CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* +// CHECK6-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* +// CHECK6-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK6-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK6-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK6-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK6-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK6-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK6: .worker: +// CHECK6-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68_worker() #[[ATTR3:[0-9]+]] +// CHECK6-NEXT: br label [[DOTEXIT:%.*]] +// CHECK6: .mastercheck: +// CHECK6-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK6-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK6-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK6-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE5]], 1 +// CHECK6-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], 1 +// CHECK6-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK6-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK6-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID3]], [[MASTER_TID]] +// CHECK6-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK6: .master: +// CHECK6-NEXT: [[NVPTX_NUM_THREADS6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK6-NEXT: [[NVPTX_WARP_SIZE7:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK6-NEXT: [[THREAD_LIMIT8:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS6]], [[NVPTX_WARP_SIZE7]] +// CHECK6-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT8]], i16 1) +// CHECK6-NEXT: call void @__kmpc_data_sharing_init_stack() +// CHECK6-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 1) +// CHECK6-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty* +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK6-NEXT: [[ARGC9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP6]], i32 0, i32 0 +// CHECK6-NEXT: store i32 [[TMP7]], i32* [[ARGC9]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK6-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC9]]) #[[ATTR3]] +// CHECK6-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP5]]) +// CHECK6-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK6: .termination.notifier: +// CHECK6-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK6-NEXT: br label [[DOTEXIT]] +// CHECK6: .exit: +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK6-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 +// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 +// CHECK6-NEXT: store i32 0, i32* [[TMP0]], align 4 +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57_worker +// CHECK6-SAME: () #[[ATTR0]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 +// CHECK6-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK6-NEXT: store i8* null, i8** [[WORK_FN]], align 8 +// CHECK6-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK6-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK6: .await.work: +// CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK6-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK6-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK6-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK6-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 +// CHECK6-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK6-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK6: .select.workers: +// CHECK6-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK6-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK6-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK6: .execute.parallel: +// CHECK6-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK6-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK6-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) +// CHECK6-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK6: .terminate.parallel: +// CHECK6-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK6-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK6: .barrier.parallel: +// CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK6-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK6: .exit: +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57 +// CHECK6-SAME: (i64 [[A:%.*]], i64 [[B:%.*]], i8** [[ARGC:%.*]]) #[[ATTR1]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK6-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 8 +// CHECK6-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK6-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK6-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 +// CHECK6-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 +// CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* +// CHECK6-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK6-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK6-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK6-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK6-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK6-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK6: .worker: +// CHECK6-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57_worker() #[[ATTR3]] +// CHECK6-NEXT: br label [[DOTEXIT:%.*]] +// CHECK6: .mastercheck: +// CHECK6-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK6-NEXT: [[NVPTX_NUM_THREADS3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK6-NEXT: [[NVPTX_WARP_SIZE4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK6-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE4]], 1 +// CHECK6-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS3]], 1 +// CHECK6-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK6-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK6-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID2]], [[MASTER_TID]] +// CHECK6-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK6: .master: +// CHECK6-NEXT: [[NVPTX_NUM_THREADS5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK6-NEXT: [[NVPTX_WARP_SIZE6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK6-NEXT: [[THREAD_LIMIT7:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS5]], [[NVPTX_WARP_SIZE6]] +// CHECK6-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT7]], i16 1) +// CHECK6-NEXT: call void @__kmpc_data_sharing_init_stack() +// CHECK6-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i64 8, i16 1) +// CHECK6-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.0* +// CHECK6-NEXT: [[TMP7:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8 +// CHECK6-NEXT: [[ARGC8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 0 +// CHECK6-NEXT: store i8** [[TMP7]], i8*** [[ARGC8]], align 8 +// CHECK6-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK6-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC8]]) #[[ATTR3]] +// CHECK6-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP5]]) +// CHECK6-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK6: .termination.notifier: +// CHECK6-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK6-NEXT: br label [[DOTEXIT]] +// CHECK6: .exit: +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8*** nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR1]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK6-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 8 +// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK6-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 8 +// CHECK6-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8 +// CHECK6-NEXT: store i8** null, i8*** [[TMP0]], align 8 +// CHECK6-NEXT: ret void +// CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68_worker +// CHECK7-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK7-NEXT: entry: +// CHECK7-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 +// CHECK7-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK7-NEXT: store i8* null, i8** [[WORK_FN]], align 4 +// CHECK7-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK7-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK7: .await.work: +// CHECK7-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK7-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK7-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK7-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK7-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK7-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK7-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK7: .select.workers: +// CHECK7-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK7-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK7-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK7: .execute.parallel: +// CHECK7-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK7-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK7-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) +// CHECK7-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK7: .terminate.parallel: +// CHECK7-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK7-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK7: .barrier.parallel: +// CHECK7-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK7-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK7: .exit: +// CHECK7-NEXT: ret void +// CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 +// CHECK7-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[ARGC:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK7-NEXT: entry: +// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 +// CHECK7-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK7-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK7-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK7-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK7-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK7-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK7: .worker: +// CHECK7-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68_worker() #[[ATTR3:[0-9]+]] +// CHECK7-NEXT: br label [[DOTEXIT:%.*]] +// CHECK7: .mastercheck: +// CHECK7-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK7-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK7-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK7-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK7-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK7-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK7-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK7-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK7-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK7: .master: +// CHECK7-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK7-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK7-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK7-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK7-NEXT: call void @__kmpc_data_sharing_init_stack() +// CHECK7-NEXT: [[TMP5:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* @"_openmp_static_kernel$size", align 4 +// CHECK7-NEXT: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i32 [[TMP6]], i16 [[TMP5]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) +// CHECK7-NEXT: [[TMP7:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i32 0 +// CHECK7-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct._globalized_locals_ty* +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 +// CHECK7-NEXT: [[ARGC7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP9]], i32 0, i32 0 +// CHECK7-NEXT: store i32 [[TMP10]], i32* [[ARGC7]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK7-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC7]]) #[[ATTR3]] +// CHECK7-NEXT: [[TMP12:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 +// CHECK7-NEXT: call void @__kmpc_restore_team_static_memory(i16 0, i16 [[TMP12]]) +// CHECK7-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK7: .termination.notifier: +// CHECK7-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK7-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK7-NEXT: br label [[DOTEXIT]] +// CHECK7: .exit: +// CHECK7-NEXT: ret void +// CHECK7-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { +// CHECK7-NEXT: entry: +// CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK7-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 4 +// CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK7-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 +// CHECK7-NEXT: store i32 0, i32* [[TMP0]], align 4 +// CHECK7-NEXT: ret void +// CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57_worker +// CHECK7-SAME: () #[[ATTR0]] { +// CHECK7-NEXT: entry: +// CHECK7-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 +// CHECK7-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK7-NEXT: store i8* null, i8** [[WORK_FN]], align 4 +// CHECK7-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK7-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK7: .await.work: +// CHECK7-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK7-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK7-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK7-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK7-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK7-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK7-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK7: .select.workers: +// CHECK7-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK7-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK7-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK7: .execute.parallel: +// CHECK7-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK7-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK7-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) +// CHECK7-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK7: .terminate.parallel: +// CHECK7-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK7-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK7: .barrier.parallel: +// CHECK7-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK7-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK7: .exit: +// CHECK7-NEXT: ret void +// CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57 +// CHECK7-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i8** [[ARGC:%.*]]) #[[ATTR1]] { +// CHECK7-NEXT: entry: +// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 4 +// CHECK7-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK7-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 +// CHECK7-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 4 +// CHECK7-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK7-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK7-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK7-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK7-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK7-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK7: .worker: +// CHECK7-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57_worker() #[[ATTR3]] +// CHECK7-NEXT: br label [[DOTEXIT:%.*]] +// CHECK7: .mastercheck: +// CHECK7-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK7-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK7-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK7-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK7-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK7-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK7-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK7-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK7-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK7: .master: +// CHECK7-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK7-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK7-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK7-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK7-NEXT: call void @__kmpc_data_sharing_init_stack() +// CHECK7-NEXT: [[TMP5:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared1", align 2 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* @"_openmp_static_kernel$size2", align 4 +// CHECK7-NEXT: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i32 [[TMP6]], i16 [[TMP5]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) +// CHECK7-NEXT: [[TMP7:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i32 0 +// CHECK7-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct._globalized_locals_ty.0* +// CHECK7-NEXT: [[TMP10:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 4 +// CHECK7-NEXT: [[ARGC7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP9]], i32 0, i32 0 +// CHECK7-NEXT: store i8** [[TMP10]], i8*** [[ARGC7]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK7-NEXT: call void @__omp_outlined__3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC7]]) #[[ATTR3]] +// CHECK7-NEXT: [[TMP12:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared1", align 2 +// CHECK7-NEXT: call void @__kmpc_restore_team_static_memory(i16 0, i16 [[TMP12]]) +// CHECK7-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK7: .termination.notifier: +// CHECK7-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK7-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK7-NEXT: br label [[DOTEXIT]] +// CHECK7: .exit: +// CHECK7-NEXT: ret void +// CHECK7-LABEL: define {{[^@]+}}@__omp_outlined__3 +// CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8*** nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { +// CHECK7-NEXT: entry: +// CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK7-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 4 +// CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK7-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 4 +// CHECK7-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 4 +// CHECK7-NEXT: store i8** null, i8*** [[TMP0]], align 4 +// CHECK7-NEXT: ret void +// CHECK8-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68_worker +// CHECK8-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK8-NEXT: entry: +// CHECK8-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 +// CHECK8-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK8-NEXT: store i8* null, i8** [[WORK_FN]], align 4 +// CHECK8-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK8-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK8: .await.work: +// CHECK8-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK8-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK8-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK8-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK8-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK8-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK8-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK8: .select.workers: +// CHECK8-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK8-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK8-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK8: .execute.parallel: +// CHECK8-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK8-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK8-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) +// CHECK8-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK8: .terminate.parallel: +// CHECK8-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK8-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK8: .barrier.parallel: +// CHECK8-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK8-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK8: .exit: +// CHECK8-NEXT: ret void +// CHECK8-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 +// CHECK8-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[ARGC:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK8-NEXT: entry: +// CHECK8-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK8-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK8-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 +// CHECK8-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 +// CHECK8-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK8-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK8-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK8-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK8-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK8-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK8: .worker: +// CHECK8-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68_worker() #[[ATTR3:[0-9]+]] +// CHECK8-NEXT: br label [[DOTEXIT:%.*]] +// CHECK8: .mastercheck: +// CHECK8-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK8-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK8-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK8-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK8-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK8-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK8-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK8-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK8-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK8: .master: +// CHECK8-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK8-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK8-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK8-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK8-NEXT: call void @__kmpc_data_sharing_init_stack() +// CHECK8-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 4, i16 1) +// CHECK8-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty* +// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 +// CHECK8-NEXT: [[ARGC7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP6]], i32 0, i32 0 +// CHECK8-NEXT: store i32 [[TMP7]], i32* [[ARGC7]], align 4 +// CHECK8-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK8-NEXT: store i32 [[TMP8]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK8-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC7]]) #[[ATTR3]] +// CHECK8-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP5]]) +// CHECK8-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK8: .termination.notifier: +// CHECK8-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK8-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK8-NEXT: br label [[DOTEXIT]] +// CHECK8: .exit: +// CHECK8-NEXT: ret void +// CHECK8-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { +// CHECK8-NEXT: entry: +// CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK8-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 4 +// CHECK8-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK8-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK8-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4 +// CHECK8-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 +// CHECK8-NEXT: store i32 0, i32* [[TMP0]], align 4 +// CHECK8-NEXT: ret void +// CHECK8-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57_worker +// CHECK8-SAME: () #[[ATTR0]] { +// CHECK8-NEXT: entry: +// CHECK8-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 +// CHECK8-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK8-NEXT: store i8* null, i8** [[WORK_FN]], align 4 +// CHECK8-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK8-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK8: .await.work: +// CHECK8-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK8-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK8-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK8-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK8-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK8-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK8-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK8: .select.workers: +// CHECK8-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK8-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK8-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK8: .execute.parallel: +// CHECK8-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK8-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK8-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) +// CHECK8-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK8: .terminate.parallel: +// CHECK8-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK8-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK8: .barrier.parallel: +// CHECK8-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK8-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK8: .exit: +// CHECK8-NEXT: ret void +// CHECK8-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57 +// CHECK8-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i8** [[ARGC:%.*]]) #[[ATTR1]] { +// CHECK8-NEXT: entry: +// CHECK8-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 4 +// CHECK8-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK8-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK8-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK8-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 +// CHECK8-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 4 +// CHECK8-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK8-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK8-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK8-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK8-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK8-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK8: .worker: +// CHECK8-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57_worker() #[[ATTR3]] +// CHECK8-NEXT: br label [[DOTEXIT:%.*]] +// CHECK8: .mastercheck: +// CHECK8-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK8-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK8-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK8-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK8-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK8-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK8-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK8-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK8-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK8: .master: +// CHECK8-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK8-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK8-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK8-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK8-NEXT: call void @__kmpc_data_sharing_init_stack() +// CHECK8-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 4, i16 1) +// CHECK8-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.0* +// CHECK8-NEXT: [[TMP7:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 4 +// CHECK8-NEXT: [[ARGC7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 0 +// CHECK8-NEXT: store i8** [[TMP7]], i8*** [[ARGC7]], align 4 +// CHECK8-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK8-NEXT: store i32 [[TMP8]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK8-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC7]]) #[[ATTR3]] +// CHECK8-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP5]]) +// CHECK8-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK8: .termination.notifier: +// CHECK8-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK8-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK8-NEXT: br label [[DOTEXIT]] +// CHECK8: .exit: +// CHECK8-NEXT: ret void +// CHECK8-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8*** nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { +// CHECK8-NEXT: entry: +// CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK8-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 4 +// CHECK8-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK8-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK8-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 4 +// CHECK8-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 4 +// CHECK8-NEXT: store i8** null, i8*** [[TMP0]], align 4 +// CHECK8-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_worker +// CHECK1-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: store i8* null, i8** [[WORK_FN]], align 8 +// CHECK1-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK1-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK1: .await.work: +// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK1-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK1-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK1-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 +// CHECK1-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK1-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK1: .select.workers: +// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK1-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK1-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK1: .execute.parallel: +// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK1-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) +// CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK1: .terminate.parallel: +// CHECK1-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK1-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK1: .barrier.parallel: +// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK1-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK1: .exit: +// CHECK1-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68_worker -// CHECK6-SAME: () #[[ATTR0:[0-9]+]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 -// CHECK6-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK6-NEXT: store i8* null, i8** [[WORK_FN]], align 8 -// CHECK6-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK6-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK6: .await.work: -// CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK6-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK6-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK6-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK6-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK6-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK6-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK6: .select.workers: -// CHECK6-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK6-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK6-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK6: .execute.parallel: -// CHECK6-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) -// CHECK6-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK6-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) -// CHECK6-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK6: .terminate.parallel: -// CHECK6-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK6-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK6: .barrier.parallel: -// CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK6-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK6: .exit: -// CHECK6-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23 +// CHECK1-SAME: (i64 [[ARGC:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK1-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* +// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK1-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK1-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK1: .worker: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_worker() #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: br label [[DOTEXIT:%.*]] +// CHECK1: .mastercheck: +// CHECK1-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK1-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK1-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK1-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK1-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK1-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK1: .master: +// CHECK1-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK1-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[ARGC7:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC7]] to i32* +// CHECK1-NEXT: store i32 [[TMP5]], i32* [[ARGC_ON_STACK]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ARGC7]]) +// CHECK1-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK1: .termination.notifier: +// CHECK1-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK1-NEXT: br label [[DOTEXIT]] +// CHECK1: .exit: +// CHECK1-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 -// CHECK6-SAME: (i64 [[A:%.*]], i64 [[B:%.*]], i64 [[ARGC:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK6-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 -// CHECK6-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK6-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK6-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK6-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 -// CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK6-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* -// CHECK6-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK6-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK6-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK6-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK6-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK6-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK6: .worker: -// CHECK6-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68_worker() #[[ATTR3:[0-9]+]] -// CHECK6-NEXT: br label [[DOTEXIT:%.*]] -// CHECK6: .mastercheck: -// CHECK6-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK6-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK6-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK6-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE5]], 1 -// CHECK6-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], 1 -// CHECK6-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK6-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK6-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID3]], [[MASTER_TID]] -// CHECK6-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK6: .master: -// CHECK6-NEXT: [[NVPTX_NUM_THREADS6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK6-NEXT: [[NVPTX_WARP_SIZE7:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK6-NEXT: [[THREAD_LIMIT8:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS6]], [[NVPTX_WARP_SIZE7]] -// CHECK6-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT8]], i16 1) -// CHECK6-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK6-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i64 4, i16 1) -// CHECK6-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty* -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[CONV2]], align 8 -// CHECK6-NEXT: [[ARGC9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP6]], i32 0, i32 0 -// CHECK6-NEXT: store i32 [[TMP7]], i32* [[ARGC9]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK6-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC9]]) #[[ATTR3]] -// CHECK6-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP5]]) -// CHECK6-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK6: .termination.notifier: -// CHECK6-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK6-NEXT: br label [[DOTEXIT]] -// CHECK6: .exit: -// CHECK6-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[TMP0]], align 4 +// CHECK1-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK6-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 -// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 -// CHECK6-NEXT: store i32 0, i32* [[TMP0]], align 4 -// CHECK6-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_worker +// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: store i8* null, i8** [[WORK_FN]], align 8 +// CHECK1-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK1-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK1: .await.work: +// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK1-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK1-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK1-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 +// CHECK1-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK1-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK1: .select.workers: +// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK1-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK1-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK1: .execute.parallel: +// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK1-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) +// CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK1: .terminate.parallel: +// CHECK1-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK1-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK1: .barrier.parallel: +// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK1-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK1: .exit: +// CHECK1-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57_worker -// CHECK6-SAME: () #[[ATTR0]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 -// CHECK6-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK6-NEXT: store i8* null, i8** [[WORK_FN]], align 8 -// CHECK6-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK6-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK6: .await.work: -// CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK6-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK6-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK6-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK6-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK6-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK6-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK6: .select.workers: -// CHECK6-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK6-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK6-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK6: .execute.parallel: -// CHECK6-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK6-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK6-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) -// CHECK6-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK6: .terminate.parallel: -// CHECK6-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK6-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK6: .barrier.parallel: -// CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK6-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK6: .exit: -// CHECK6-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15 +// CHECK1-SAME: (i8** [[ARGC:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 8 +// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK1-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 +// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK1-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK1-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK1: .worker: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_worker() #[[ATTR3]] +// CHECK1-NEXT: br label [[DOTEXIT:%.*]] +// CHECK1: .mastercheck: +// CHECK1-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK1-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK1-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK1-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK1-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK1-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK1: .master: +// CHECK1-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK1-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8 +// CHECK1-NEXT: [[ARGC7:%.*]] = call i8* @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC7]] to i8*** +// CHECK1-NEXT: store i8** [[TMP5]], i8*** [[ARGC_ON_STACK]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[ARGC7]]) +// CHECK1-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK1: .termination.notifier: +// CHECK1-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK1-NEXT: br label [[DOTEXIT]] +// CHECK1: .exit: +// CHECK1-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57 -// CHECK6-SAME: (i64 [[A:%.*]], i64 [[B:%.*]], i8** [[ARGC:%.*]]) #[[ATTR1]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK6-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 8 -// CHECK6-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK6-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK6-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK6-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 -// CHECK6-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK6-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* -// CHECK6-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK6-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK6-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK6-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK6-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK6-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK6: .worker: -// CHECK6-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57_worker() #[[ATTR3]] -// CHECK6-NEXT: br label [[DOTEXIT:%.*]] -// CHECK6: .mastercheck: -// CHECK6-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK6-NEXT: [[NVPTX_NUM_THREADS3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK6-NEXT: [[NVPTX_WARP_SIZE4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK6-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE4]], 1 -// CHECK6-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS3]], 1 -// CHECK6-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK6-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK6-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID2]], [[MASTER_TID]] -// CHECK6-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK6: .master: -// CHECK6-NEXT: [[NVPTX_NUM_THREADS5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK6-NEXT: [[NVPTX_WARP_SIZE6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK6-NEXT: [[THREAD_LIMIT7:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS5]], [[NVPTX_WARP_SIZE6]] -// CHECK6-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT7]], i16 1) -// CHECK6-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK6-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i64 8, i16 1) -// CHECK6-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.0* -// CHECK6-NEXT: [[TMP7:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8 -// CHECK6-NEXT: [[ARGC8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 0 -// CHECK6-NEXT: store i8** [[TMP7]], i8*** [[ARGC8]], align 8 -// CHECK6-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK6-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC8]]) #[[ATTR3]] -// CHECK6-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP5]]) -// CHECK6-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK6: .termination.notifier: -// CHECK6-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK6-NEXT: br label [[DOTEXIT]] -// CHECK6: .exit: -// CHECK6-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8*** nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 8 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8 +// CHECK1-NEXT: store i8** null, i8*** [[TMP0]], align 8 +// CHECK1-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_worker +// CHECK2-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK2-NEXT: store i8* null, i8** [[WORK_FN]], align 4 +// CHECK2-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK2-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK2: .await.work: +// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK2-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK2-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK2-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK2-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK2-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK2: .select.workers: +// CHECK2-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK2-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK2-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK2: .execute.parallel: +// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK2-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) +// CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK2: .terminate.parallel: +// CHECK2-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK2-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK2: .barrier.parallel: +// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK2-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK2: .exit: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23 +// CHECK2-SAME: (i32 [[ARGC:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 +// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK2-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK2-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK2: .worker: +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l23_worker() #[[ATTR3:[0-9]+]] +// CHECK2-NEXT: br label [[DOTEXIT:%.*]] +// CHECK2: .mastercheck: +// CHECK2-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK2-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK2-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK2-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK2-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK2-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK2: .master: +// CHECK2-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 +// CHECK2-NEXT: [[ARGC7:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC7]] to i32* +// CHECK2-NEXT: store i32 [[TMP5]], i32* [[ARGC_ON_STACK]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR3]] +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[ARGC7]]) +// CHECK2-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK2: .termination.notifier: +// CHECK2-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK2-NEXT: br label [[DOTEXIT]] +// CHECK2: .exit: +// CHECK2-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8*** nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR1]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK6-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 8 -// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK6-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 8 -// CHECK6-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8 -// CHECK6-NEXT: store i8** null, i8*** [[TMP0]], align 8 -// CHECK6-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[TMP0]], align 4 +// CHECK2-NEXT: ret void // // -// CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68_worker -// CHECK7-SAME: () #[[ATTR0:[0-9]+]] { -// CHECK7-NEXT: entry: -// CHECK7-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 -// CHECK7-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK7-NEXT: store i8* null, i8** [[WORK_FN]], align 4 -// CHECK7-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK7-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK7: .await.work: -// CHECK7-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK7-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK7-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK7-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK7-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK7-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK7-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK7: .select.workers: -// CHECK7-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK7-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK7-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK7: .execute.parallel: -// CHECK7-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) -// CHECK7-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK7-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) -// CHECK7-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK7: .terminate.parallel: -// CHECK7-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK7-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK7: .barrier.parallel: -// CHECK7-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK7-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK7: .exit: -// CHECK7-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_worker +// CHECK2-SAME: () #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK2-NEXT: store i8* null, i8** [[WORK_FN]], align 4 +// CHECK2-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK2-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK2: .await.work: +// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK2-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK2-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK2-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK2-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK2-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK2: .select.workers: +// CHECK2-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK2-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK2-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK2: .execute.parallel: +// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK2-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) +// CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK2: .terminate.parallel: +// CHECK2-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK2-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK2: .barrier.parallel: +// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK2-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK2: .exit: +// CHECK2-NEXT: ret void // // -// CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 -// CHECK7-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[ARGC:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK7-NEXT: entry: -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 -// CHECK7-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK7-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK7-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK7-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK7-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK7-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK7: .worker: -// CHECK7-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68_worker() #[[ATTR3:[0-9]+]] -// CHECK7-NEXT: br label [[DOTEXIT:%.*]] -// CHECK7: .mastercheck: -// CHECK7-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK7-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK7-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK7-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK7-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK7-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK7-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK7-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK7-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK7: .master: -// CHECK7-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK7-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK7-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK7-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK7-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK7-NEXT: [[TMP5:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* @"_openmp_static_kernel$size", align 4 -// CHECK7-NEXT: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i32 [[TMP6]], i16 [[TMP5]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) -// CHECK7-NEXT: [[TMP7:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i32 0 -// CHECK7-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct._globalized_locals_ty* -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK7-NEXT: [[ARGC7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP9]], i32 0, i32 0 -// CHECK7-NEXT: store i32 [[TMP10]], i32* [[ARGC7]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK7-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC7]]) #[[ATTR3]] -// CHECK7-NEXT: [[TMP12:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK7-NEXT: call void @__kmpc_restore_team_static_memory(i16 0, i16 [[TMP12]]) -// CHECK7-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK7: .termination.notifier: -// CHECK7-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK7-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK7-NEXT: br label [[DOTEXIT]] -// CHECK7: .exit: -// CHECK7-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15 +// CHECK2-SAME: (i8** [[ARGC:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 4 +// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK2-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 4 +// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK2-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK2-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK2: .worker: +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l15_worker() #[[ATTR3]] +// CHECK2-NEXT: br label [[DOTEXIT:%.*]] +// CHECK2: .mastercheck: +// CHECK2-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK2-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK2-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK2-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK2-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK2-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK2: .master: +// CHECK2-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK2-NEXT: [[TMP5:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 4 +// CHECK2-NEXT: [[ARGC7:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC7]] to i8*** +// CHECK2-NEXT: store i8** [[TMP5]], i8*** [[ARGC_ON_STACK]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR3]] +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[ARGC7]]) +// CHECK2-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK2: .termination.notifier: +// CHECK2-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK2-NEXT: br label [[DOTEXIT]] +// CHECK2: .exit: +// CHECK2-NEXT: ret void // // -// CHECK7-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { -// CHECK7-NEXT: entry: -// CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 -// CHECK7-NEXT: store i32 0, i32* [[TMP0]], align 4 -// CHECK7-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8*** nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 4 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 4 +// CHECK2-NEXT: store i8** null, i8*** [[TMP0]], align 4 +// CHECK2-NEXT: ret void // // -// CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57_worker -// CHECK7-SAME: () #[[ATTR0]] { -// CHECK7-NEXT: entry: -// CHECK7-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 -// CHECK7-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK7-NEXT: store i8* null, i8** [[WORK_FN]], align 4 -// CHECK7-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK7-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK7: .await.work: -// CHECK7-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK7-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK7-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK7-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK7-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK7-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK7-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK7: .select.workers: -// CHECK7-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK7-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK7-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK7: .execute.parallel: -// CHECK7-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK7-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK7-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) -// CHECK7-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK7: .terminate.parallel: -// CHECK7-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK7-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK7: .barrier.parallel: -// CHECK7-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK7-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK7: .exit: -// CHECK7-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_worker +// CHECK3-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 +// CHECK3-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK3-NEXT: store i8* null, i8** [[WORK_FN]], align 8 +// CHECK3-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK3-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK3: .await.work: +// CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK3-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK3-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK3-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 +// CHECK3-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK3-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK3: .select.workers: +// CHECK3-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK3-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK3-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK3: .execute.parallel: +// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK3-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) +// CHECK3-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK3: .terminate.parallel: +// CHECK3-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK3-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK3: .barrier.parallel: +// CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK3-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK3: .exit: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64 +// CHECK3-SAME: (i64 [[A:%.*]], i64 [[B:%.*]], i64 [[ARGC:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK3-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK3-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 +// CHECK3-NEXT: store i64 [[ARGC]], i64* [[ARGC_ADDR]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* +// CHECK3-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARGC_ADDR]] to i32* +// CHECK3-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK3-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK3-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK3-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK3: .worker: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_worker() #[[ATTR3:[0-9]+]] +// CHECK3-NEXT: br label [[DOTEXIT:%.*]] +// CHECK3: .mastercheck: +// CHECK3-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK3-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK3-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE5]], 1 +// CHECK3-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK3-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK3-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID3]], [[MASTER_TID]] +// CHECK3-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK3: .master: +// CHECK3-NEXT: [[NVPTX_NUM_THREADS6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: [[NVPTX_WARP_SIZE7:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK3-NEXT: [[THREAD_LIMIT8:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS6]], [[NVPTX_WARP_SIZE7]] +// CHECK3-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT8]], i16 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[CONV2]], align 8 +// CHECK3-NEXT: [[ARGC9:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK3-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC9]] to i32* +// CHECK3-NEXT: store i32 [[TMP5]], i32* [[ARGC_ON_STACK]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR3]] +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[ARGC9]]) +// CHECK3-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK3: .termination.notifier: +// CHECK3-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK3-NEXT: br label [[DOTEXIT]] +// CHECK3: .exit: +// CHECK3-NEXT: ret void // // -// CHECK7-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57 -// CHECK7-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i8** [[ARGC:%.*]]) #[[ATTR1]] { -// CHECK7-NEXT: entry: -// CHECK7-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 4 -// CHECK7-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK7-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK7-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 4 -// CHECK7-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK7-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK7-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK7-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK7-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK7-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK7: .worker: -// CHECK7-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57_worker() #[[ATTR3]] -// CHECK7-NEXT: br label [[DOTEXIT:%.*]] -// CHECK7: .mastercheck: -// CHECK7-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK7-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK7-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK7-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK7-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK7-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK7-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK7-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK7-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK7: .master: -// CHECK7-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK7-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK7-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK7-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK7-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK7-NEXT: [[TMP5:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared1", align 2 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* @"_openmp_static_kernel$size2", align 4 -// CHECK7-NEXT: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i32 [[TMP6]], i16 [[TMP5]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) -// CHECK7-NEXT: [[TMP7:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i32 0 -// CHECK7-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct._globalized_locals_ty.0* -// CHECK7-NEXT: [[TMP10:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 4 -// CHECK7-NEXT: [[ARGC7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP9]], i32 0, i32 0 -// CHECK7-NEXT: store i8** [[TMP10]], i8*** [[ARGC7]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK7-NEXT: store i32 [[TMP11]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK7-NEXT: call void @__omp_outlined__3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC7]]) #[[ATTR3]] -// CHECK7-NEXT: [[TMP12:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared1", align 2 -// CHECK7-NEXT: call void @__kmpc_restore_team_static_memory(i16 0, i16 [[TMP12]]) -// CHECK7-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK7: .termination.notifier: -// CHECK7-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK7-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK7-NEXT: br label [[DOTEXIT]] -// CHECK7: .exit: -// CHECK7-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 8 +// CHECK3-NEXT: store i32 0, i32* [[TMP0]], align 4 +// CHECK3-NEXT: ret void // // -// CHECK7-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK7-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8*** nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { -// CHECK7-NEXT: entry: -// CHECK7-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK7-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 4 -// CHECK7-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK7-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK7-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 4 -// CHECK7-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 4 -// CHECK7-NEXT: store i8** null, i8*** [[TMP0]], align 4 -// CHECK7-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_worker +// CHECK3-SAME: () #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 +// CHECK3-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK3-NEXT: store i8* null, i8** [[WORK_FN]], align 8 +// CHECK3-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK3-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK3: .await.work: +// CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK3-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK3-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK3-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 +// CHECK3-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK3-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK3: .select.workers: +// CHECK3-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK3-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK3-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK3: .execute.parallel: +// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK3-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) +// CHECK3-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK3: .terminate.parallel: +// CHECK3-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK3-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK3: .barrier.parallel: +// CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK3-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK3: .exit: +// CHECK3-NEXT: ret void // // -// CHECK8-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68_worker -// CHECK8-SAME: () #[[ATTR0:[0-9]+]] { -// CHECK8-NEXT: entry: -// CHECK8-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 -// CHECK8-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK8-NEXT: store i8* null, i8** [[WORK_FN]], align 4 -// CHECK8-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK8-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK8: .await.work: -// CHECK8-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK8-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK8-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK8-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK8-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK8-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK8-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK8: .select.workers: -// CHECK8-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK8-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK8-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK8: .execute.parallel: -// CHECK8-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) -// CHECK8-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK8-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) -// CHECK8-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK8: .terminate.parallel: -// CHECK8-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK8-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK8: .barrier.parallel: -// CHECK8-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK8-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK8: .exit: -// CHECK8-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53 +// CHECK3-SAME: (i64 [[A:%.*]], i64 [[B:%.*]], i8** [[ARGC:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 8 +// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK3-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK3-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 +// CHECK3-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 8 +// CHECK3-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i32* +// CHECK3-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK3-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK3-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK3-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK3: .worker: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_worker() #[[ATTR3]] +// CHECK3-NEXT: br label [[DOTEXIT:%.*]] +// CHECK3: .mastercheck: +// CHECK3-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK3-NEXT: [[NVPTX_NUM_THREADS3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: [[NVPTX_WARP_SIZE4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK3-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE4]], 1 +// CHECK3-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK3-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK3-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID2]], [[MASTER_TID]] +// CHECK3-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK3: .master: +// CHECK3-NEXT: [[NVPTX_NUM_THREADS5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: [[NVPTX_WARP_SIZE6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK3-NEXT: [[THREAD_LIMIT7:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS5]], [[NVPTX_WARP_SIZE6]] +// CHECK3-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT7]], i16 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 8 +// CHECK3-NEXT: [[ARGC8:%.*]] = call i8* @__kmpc_alloc_shared(i64 8) +// CHECK3-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC8]] to i8*** +// CHECK3-NEXT: store i8** [[TMP5]], i8*** [[ARGC_ON_STACK]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR3]] +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[ARGC8]]) +// CHECK3-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK3: .termination.notifier: +// CHECK3-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK3-NEXT: br label [[DOTEXIT]] +// CHECK3: .exit: +// CHECK3-NEXT: ret void // // -// CHECK8-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68 -// CHECK8-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[ARGC:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK8-NEXT: entry: -// CHECK8-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK8-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK8-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK8-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 -// CHECK8-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK8-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK8-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK8-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK8-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK8-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK8: .worker: -// CHECK8-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l68_worker() #[[ATTR3:[0-9]+]] -// CHECK8-NEXT: br label [[DOTEXIT:%.*]] -// CHECK8: .mastercheck: -// CHECK8-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK8-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK8-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK8-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK8-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK8-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK8-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK8-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK8-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK8: .master: -// CHECK8-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK8-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK8-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK8-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK8-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK8-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 4, i16 1) -// CHECK8-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty* -// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 -// CHECK8-NEXT: [[ARGC7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP6]], i32 0, i32 0 -// CHECK8-NEXT: store i32 [[TMP7]], i32* [[ARGC7]], align 4 -// CHECK8-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK8-NEXT: store i32 [[TMP8]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK8-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC7]]) #[[ATTR3]] -// CHECK8-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP5]]) -// CHECK8-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK8: .termination.notifier: -// CHECK8-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK8-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK8-NEXT: br label [[DOTEXIT]] -// CHECK8: .exit: -// CHECK8-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8*** nonnull align 8 dereferenceable(8) [[ARGC:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK3-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 8 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK3-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 8 +// CHECK3-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 8 +// CHECK3-NEXT: store i8** null, i8*** [[TMP0]], align 8 +// CHECK3-NEXT: ret void +// +// +// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_worker +// CHECK4-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 +// CHECK4-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK4-NEXT: store i8* null, i8** [[WORK_FN]], align 4 +// CHECK4-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK4-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK4: .await.work: +// CHECK4-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK4-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK4-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK4-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK4-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK4-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK4-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK4: .select.workers: +// CHECK4-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK4-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK4-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK4: .execute.parallel: +// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK4-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK4-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) +// CHECK4-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK4: .terminate.parallel: +// CHECK4-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK4-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK4: .barrier.parallel: +// CHECK4-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK4-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK4: .exit: +// CHECK4-NEXT: ret void // // -// CHECK8-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { -// CHECK8-NEXT: entry: -// CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK8-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 4 -// CHECK8-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK8-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK8-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4 -// CHECK8-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 -// CHECK8-NEXT: store i32 0, i32* [[TMP0]], align 4 -// CHECK8-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64 +// CHECK4-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i32 [[ARGC:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[ARGC]], i32* [[ARGC_ADDR]], align 4 +// CHECK4-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK4-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK4-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK4-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK4-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK4: .worker: +// CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_main_l64_worker() #[[ATTR3:[0-9]+]] +// CHECK4-NEXT: br label [[DOTEXIT:%.*]] +// CHECK4: .mastercheck: +// CHECK4-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK4-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK4-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK4-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK4-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK4-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK4-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK4-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK4-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK4: .master: +// CHECK4-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK4-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK4-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK4-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[ARGC_ADDR]], align 4 +// CHECK4-NEXT: [[ARGC7:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK4-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC7]] to i32* +// CHECK4-NEXT: store i32 [[TMP5]], i32* [[ARGC_ON_STACK]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK4-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[ARGC_ON_STACK]]) #[[ATTR3]] +// CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[ARGC7]]) +// CHECK4-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK4: .termination.notifier: +// CHECK4-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK4-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK4-NEXT: br label [[DOTEXIT]] +// CHECK4: .exit: +// CHECK4-NEXT: ret void // // -// CHECK8-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57_worker -// CHECK8-SAME: () #[[ATTR0]] { -// CHECK8-NEXT: entry: -// CHECK8-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 -// CHECK8-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK8-NEXT: store i8* null, i8** [[WORK_FN]], align 4 -// CHECK8-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK8-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK8: .await.work: -// CHECK8-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK8-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK8-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK8-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK8-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK8-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK8-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK8: .select.workers: -// CHECK8-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK8-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK8-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK8: .execute.parallel: -// CHECK8-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK8-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK8-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) -// CHECK8-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK8: .terminate.parallel: -// CHECK8-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK8-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK8: .barrier.parallel: -// CHECK8-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK8-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK8: .exit: -// CHECK8-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32* [[ARGC]], i32** [[ARGC_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i32*, i32** [[ARGC_ADDR]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[TMP0]], align 4 +// CHECK4-NEXT: ret void // // -// CHECK8-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57 -// CHECK8-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i8** [[ARGC:%.*]]) #[[ATTR1]] { -// CHECK8-NEXT: entry: -// CHECK8-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 4 -// CHECK8-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK8-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK8-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK8-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK8-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 4 -// CHECK8-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK8-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK8-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK8-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK8-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK8-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK8: .worker: -// CHECK8-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l57_worker() #[[ATTR3]] -// CHECK8-NEXT: br label [[DOTEXIT:%.*]] -// CHECK8: .mastercheck: -// CHECK8-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK8-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK8-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK8-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK8-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK8-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK8-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK8-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK8-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK8: .master: -// CHECK8-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK8-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK8-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK8-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK8-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK8-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 4, i16 1) -// CHECK8-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.0* -// CHECK8-NEXT: [[TMP7:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 4 -// CHECK8-NEXT: [[ARGC7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 0 -// CHECK8-NEXT: store i8** [[TMP7]], i8*** [[ARGC7]], align 4 -// CHECK8-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK8-NEXT: store i32 [[TMP8]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK8-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC7]]) #[[ATTR3]] -// CHECK8-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP5]]) -// CHECK8-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK8: .termination.notifier: -// CHECK8-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK8-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK8-NEXT: br label [[DOTEXIT]] -// CHECK8: .exit: -// CHECK8-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_worker +// CHECK4-SAME: () #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 +// CHECK4-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK4-NEXT: store i8* null, i8** [[WORK_FN]], align 4 +// CHECK4-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK4-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK4: .await.work: +// CHECK4-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK4-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK4-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK4-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK4-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK4-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK4-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK4: .select.workers: +// CHECK4-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK4-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK4-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK4: .execute.parallel: +// CHECK4-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK4-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) +// CHECK4-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK4: .terminate.parallel: +// CHECK4-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK4-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK4: .barrier.parallel: +// CHECK4-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK4-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK4: .exit: +// CHECK4-NEXT: ret void // // -// CHECK8-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK8-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8*** nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { -// CHECK8-NEXT: entry: -// CHECK8-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK8-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK8-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 4 -// CHECK8-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK8-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK8-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 4 -// CHECK8-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 4 -// CHECK8-NEXT: store i8** null, i8*** [[TMP0]], align 4 -// CHECK8-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53 +// CHECK4-SAME: (i32 [[A:%.*]], i32 [[B:%.*]], i8** [[ARGC:%.*]]) #[[ATTR1]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i8**, align 4 +// CHECK4-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 +// CHECK4-NEXT: store i8** [[ARGC]], i8*** [[ARGC_ADDR]], align 4 +// CHECK4-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK4-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK4-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK4-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK4-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK4-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK4: .worker: +// CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z5tmainIPPcEiT__l53_worker() #[[ATTR3]] +// CHECK4-NEXT: br label [[DOTEXIT:%.*]] +// CHECK4: .mastercheck: +// CHECK4-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK4-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK4-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK4-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK4-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK4-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK4-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK4-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK4-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK4: .master: +// CHECK4-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK4-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK4-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK4-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK4-NEXT: [[TMP5:%.*]] = load i8**, i8*** [[ARGC_ADDR]], align 4 +// CHECK4-NEXT: [[ARGC7:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK4-NEXT: [[ARGC_ON_STACK:%.*]] = bitcast i8* [[ARGC7]] to i8*** +// CHECK4-NEXT: store i8** [[TMP5]], i8*** [[ARGC_ON_STACK]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK4-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8*** [[ARGC_ON_STACK]]) #[[ATTR3]] +// CHECK4-NEXT: call void @__kmpc_free_shared(i8* [[ARGC7]]) +// CHECK4-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK4: .termination.notifier: +// CHECK4-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK4-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK4-NEXT: br label [[DOTEXIT]] +// CHECK4: .exit: +// CHECK4-NEXT: ret void +// +// +// CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8*** nonnull align 4 dereferenceable(4) [[ARGC:%.*]]) #[[ATTR1]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK4-NEXT: [[ARGC_ADDR:%.*]] = alloca i8***, align 4 +// CHECK4-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK4-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK4-NEXT: store i8*** [[ARGC]], i8**** [[ARGC_ADDR]], align 4 +// CHECK4-NEXT: [[TMP0:%.*]] = load i8***, i8**** [[ARGC_ADDR]], align 4 +// CHECK4-NEXT: store i8** null, i8*** [[TMP0]], align 4 +// CHECK4-NEXT: ret void // diff --git a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp index d0d64328a634e..f4f089f3a374e 100644 --- a/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp +++ b/clang/test/OpenMP/nvptx_teams_reduction_codegen.cpp @@ -2,12 +2,9 @@ // Test target codegen - host bc file has to be created first. // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - | FileCheck %s --check-prefix=CHECK1 -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o - -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK2 // RUN: %clang_cc1 -verify -fopenmp -x c++ -triple i386-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm-bc %s -o %t-x86-host.bc -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK3 -// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -fopenmp-cuda-teams-reduction-recs-num=2048 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK4 -// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK5 -// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -fopenmp-cuda-teams-reduction-recs-num=2048 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - -fopenmp-cuda-parallel-target-regions | FileCheck %s --check-prefix=CHECK6 +// RUN: %clang_cc1 -verify -fopenmp -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK2 +// RUN: %clang_cc1 -verify -fopenmp -fexceptions -fcxx-exceptions -x c++ -triple nvptx-unknown-unknown -fopenmp-targets=nvptx-nvidia-cuda -fopenmp-cuda-teams-reduction-recs-num=2048 -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-x86-host.bc -o - | FileCheck %s --check-prefix=CHECK3 // expected-no-diagnostics #ifndef HEADER #define HEADER @@ -53,4354 +50,6 @@ int bar(int n){ } #endif -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_worker -// CHECK1-SAME: () #[[ATTR0:[0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: store i8* null, i8** [[WORK_FN]], align 8 -// CHECK1-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK1-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK1: .await.work: -// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK1-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK1-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK1-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK1-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK1-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK1: .select.workers: -// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK1-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK1-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK1: .execute.parallel: -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK1-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) -// CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK1: .terminate.parallel: -// CHECK1-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK1-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK1: .barrier.parallel: -// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK1-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK1: .exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23 -// CHECK1-SAME: (i64 [[E:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[E_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: store i64 [[E]], i64* [[E_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[E_ADDR]] to double* -// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK1-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK1-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK1: .worker: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_worker() #[[ATTR3:[0-9]+]] -// CHECK1-NEXT: br label [[DOTEXIT:%.*]] -// CHECK1: .mastercheck: -// CHECK1-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK1-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK1-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK1-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK1-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK1: .master: -// CHECK1-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK1-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* @"_openmp_static_kernel$size", align 8 -// CHECK1-NEXT: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i64 [[TMP6]], i16 [[TMP5]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) -// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct._globalized_locals_ty* -// CHECK1-NEXT: [[TMP10:%.*]] = load double, double* [[CONV]], align 8 -// CHECK1-NEXT: [[E7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP9]], i32 0, i32 0 -// CHECK1-NEXT: store double [[TMP10]], double* [[E7]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E7]]) #[[ATTR3]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK1-NEXT: call void @__kmpc_restore_team_static_memory(i16 0, i16 [[TMP12]]) -// CHECK1-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK1: .termination.notifier: -// CHECK1-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK1-NEXT: br label [[DOTEXIT]] -// CHECK1: .exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], double* nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[E_ADDR:%.*]] = alloca double*, align 8 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store double* [[E]], double** [[E_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds i8, i8* [[TMP1]], i64 8 -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct._globalized_locals_ty.0* -// CHECK1-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP3]], i32 0, i32 0 -// CHECK1-NEXT: store double 0.000000e+00, double* [[E1]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = load double, double* [[E1]], align 8 -// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP4]], 5.000000e+00 -// CHECK1-NEXT: store double [[ADD]], double* [[E1]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast double* [[E1]] to i8* -// CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP6]], i8* [[TMP10]], i32 1024, i8* [[TMP9]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func) -// CHECK1-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP11]], 1 -// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] -// CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[TMP13:%.*]] = load double, double* [[TMP0]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = load double, double* [[E1]], align 8 -// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store double [[ADD2]], double* [[TMP0]], align 8 -// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) -// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] -// CHECK1: .omp.reduction.done: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func -// CHECK1-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 -// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 -// CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 8 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8 -// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 -// CHECK1-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 -// CHECK1-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* -// CHECK1-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 -// CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 -// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to double* -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP12]], i64 1 -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8* -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP12]] to i64* -// CHECK1-NEXT: [[TMP16:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i64* -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 8 -// CHECK1-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[TMP18:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 -// CHECK1-NEXT: [[TMP19:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP18]]) -// CHECK1-NEXT: store i64 [[TMP19]], i64* [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr i64, i64* [[TMP15]], i64 1 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP16]], i64 1 -// CHECK1-NEXT: [[TMP22:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK1-NEXT: store i8* [[TMP22]], i8** [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK1-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP25:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP26:%.*]] = and i1 [[TMP24]], [[TMP25]] -// CHECK1-NEXT: [[TMP27:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK1-NEXT: [[TMP28:%.*]] = and i16 [[TMP6]], 1 -// CHECK1-NEXT: [[TMP29:%.*]] = icmp eq i16 [[TMP28]], 0 -// CHECK1-NEXT: [[TMP30:%.*]] = and i1 [[TMP27]], [[TMP29]] -// CHECK1-NEXT: [[TMP31:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK1-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]] -// CHECK1-NEXT: [[TMP33:%.*]] = or i1 [[TMP23]], [[TMP26]] -// CHECK1-NEXT: [[TMP34:%.*]] = or i1 [[TMP33]], [[TMP32]] -// CHECK1-NEXT: br i1 [[TMP34]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK1: then: -// CHECK1-NEXT: [[TMP35:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* -// CHECK1-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR3]] -// CHECK1-NEXT: br label [[IFCONT:%.*]] -// CHECK1: else: -// CHECK1-NEXT: br label [[IFCONT]] -// CHECK1: ifcont: -// CHECK1-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] -// CHECK1-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK1: then4: -// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP41:%.*]] = load i8*, i8** [[TMP40]], align 8 -// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 8 -// CHECK1-NEXT: [[TMP44:%.*]] = bitcast i8* [[TMP41]] to double* -// CHECK1-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP43]] to double* -// CHECK1-NEXT: [[TMP46:%.*]] = load double, double* [[TMP44]], align 8 -// CHECK1-NEXT: store double [[TMP46]], double* [[TMP45]], align 8 -// CHECK1-NEXT: br label [[IFCONT6:%.*]] -// CHECK1: else5: -// CHECK1-NEXT: br label [[IFCONT6]] -// CHECK1: ifcont6: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func -// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 -// CHECK1-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* -// CHECK1-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4 -// CHECK1-NEXT: br label [[PRECOND:%.*]] -// CHECK1: precond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP5]], 2 -// CHECK1-NEXT: br i1 [[TMP6]], label [[BODY:%.*]], label [[EXIT:%.*]] -// CHECK1: body: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) -// CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK1: then: -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[TMP9]], i32 [[TMP5]] -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: store volatile i32 [[TMP12]], i32 addrspace(3)* [[TMP11]], align 4 -// CHECK1-NEXT: br label [[IFCONT:%.*]] -// CHECK1: else: -// CHECK1-NEXT: br label [[IFCONT]] -// CHECK1: ifcont: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP13]] -// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK1: then4: -// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8 -// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i32* -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP17]], i32 [[TMP5]] -// CHECK1-NEXT: [[TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4 -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4 -// CHECK1-NEXT: br label [[IFCONT6:%.*]] -// CHECK1: else5: -// CHECK1-NEXT: br label [[IFCONT6]] -// CHECK1: ifcont6: -// CHECK1-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK1-NEXT: store i32 [[TMP20]], i32* [[DOTCNT_ADDR]], align 4 -// CHECK1-NEXT: br label [[PRECOND]] -// CHECK1: exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func -// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* -// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.1* -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to double* -// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x double], [1024 x double]* [[E]], i32 0, i32 [[TMP7]] -// CHECK1-NEXT: [[TMP12:%.*]] = load double, double* [[TMP10]], align 8 -// CHECK1-NEXT: store double [[TMP12]], double* [[TMP11]], align 128 -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func -// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 -// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.1* -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x double], [1024 x double]* [[E]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast double* [[TMP7]] to i8* -// CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP9]], i8* [[TMP10]]) #[[ATTR3]] -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func -// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* -// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.1* -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to double* -// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x double], [1024 x double]* [[E]], i32 0, i32 [[TMP7]] -// CHECK1-NEXT: [[TMP12:%.*]] = load double, double* [[TMP11]], align 128 -// CHECK1-NEXT: store double [[TMP12]], double* [[TMP10]], align 8 -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func -// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 -// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.1* -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x double], [1024 x double]* [[E]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast double* [[TMP7]] to i8* -// CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP10]], i8* [[TMP9]]) #[[ATTR3]] -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_worker -// CHECK1-SAME: () #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: store i8* null, i8** [[WORK_FN]], align 8 -// CHECK1-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK1-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK1: .await.work: -// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK1-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK1-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK1-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK1-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK1-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK1: .select.workers: -// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK1-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK1-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK1: .execute.parallel: -// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK1-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) -// CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK1: .terminate.parallel: -// CHECK1-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK1-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK1: .barrier.parallel: -// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK1-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK1: .exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29 -// CHECK1-SAME: (i64 [[C:%.*]], i64 [[D:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[D]], i64* [[D_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[C_ADDR]] to i8* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[D_ADDR]] to float* -// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK1-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK1-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK1: .worker: -// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_worker() #[[ATTR3]] -// CHECK1-NEXT: br label [[DOTEXIT:%.*]] -// CHECK1: .mastercheck: -// CHECK1-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_NUM_THREADS3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[NVPTX_WARP_SIZE4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE4]], 1 -// CHECK1-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS3]], 1 -// CHECK1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK1-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK1-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID2]], [[MASTER_TID]] -// CHECK1-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK1: .master: -// CHECK1-NEXT: [[NVPTX_NUM_THREADS5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: [[NVPTX_WARP_SIZE6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[THREAD_LIMIT7:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS5]], [[NVPTX_WARP_SIZE6]] -// CHECK1-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT7]], i16 1) -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK1-NEXT: [[TMP5:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared1", align 2 -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* @"_openmp_static_kernel$size2", align 8 -// CHECK1-NEXT: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i64 [[TMP6]], i16 [[TMP5]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) -// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct._globalized_locals_ty.2* -// CHECK1-NEXT: [[TMP10:%.*]] = load i8, i8* [[CONV]], align 8 -// CHECK1-NEXT: [[C8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], %struct._globalized_locals_ty.2* [[TMP9]], i32 0, i32 1 -// CHECK1-NEXT: store i8 [[TMP10]], i8* [[C8]], align 4 -// CHECK1-NEXT: [[TMP11:%.*]] = load float, float* [[CONV1]], align 8 -// CHECK1-NEXT: [[D9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], %struct._globalized_locals_ty.2* [[TMP9]], i32 0, i32 0 -// CHECK1-NEXT: store float [[TMP11]], float* [[D9]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C8]], float* [[D9]]) #[[ATTR3]] -// CHECK1-NEXT: [[TMP13:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared1", align 2 -// CHECK1-NEXT: call void @__kmpc_restore_team_static_memory(i16 0, i16 [[TMP13]]) -// CHECK1-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK1: .termination.notifier: -// CHECK1-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK1-NEXT: br label [[DOTEXIT]] -// CHECK1: .exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8* nonnull align 1 dereferenceable(1) [[C:%.*]], float* nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 8 -// CHECK1-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 8 -// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i64 8 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.3* -// CHECK1-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3:%.*]], %struct._globalized_locals_ty.3* [[TMP4]], i32 0, i32 1 -// CHECK1-NEXT: [[D2:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3]], %struct._globalized_locals_ty.3* [[TMP4]], i32 0, i32 0 -// CHECK1-NEXT: store i8 0, i8* [[C1]], align 4 -// CHECK1-NEXT: store float 1.000000e+00, float* [[D2]], align 4 -// CHECK1-NEXT: [[TMP5:%.*]] = load i8, i8* [[C1]], align 4 -// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP5]] to i32 -// CHECK1-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2 -// CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8 -// CHECK1-NEXT: store i8 [[CONV3]], i8* [[C1]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[D2]], align 4 -// CHECK1-NEXT: [[MUL:%.*]] = fmul float [[TMP6]], 3.300000e+01 -// CHECK1-NEXT: store float [[MUL]], float* [[D2]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: store i8* [[C1]], i8** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast float* [[D2]] to i8* -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP13:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i8* [[TMP13]], i32 1024, i8* [[TMP12]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func5, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func6, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func7, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func8, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func9, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func10) -// CHECK1-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 1 -// CHECK1-NEXT: br i1 [[TMP15]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] -// CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[TMP16:%.*]] = load i8, i8* [[TMP0]], align 1 -// CHECK1-NEXT: [[CONV4:%.*]] = sext i8 [[TMP16]] to i32 -// CHECK1-NEXT: [[TMP17:%.*]] = load i8, i8* [[C1]], align 4 -// CHECK1-NEXT: [[CONV5:%.*]] = sext i8 [[TMP17]] to i32 -// CHECK1-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] -// CHECK1-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 -// CHECK1-NEXT: store i8 [[CONV7]], i8* [[TMP0]], align 1 -// CHECK1-NEXT: [[TMP18:%.*]] = load float, float* [[TMP1]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load float, float* [[D2]], align 4 -// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[TMP18]], [[TMP19]] -// CHECK1-NEXT: store float [[MUL8]], float* [[TMP1]], align 4 -// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP8]]) -// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] -// CHECK1: .omp.reduction.done: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func5 -// CHECK1-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 -// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 -// CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 8 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i8, align 1 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca float, align 4 -// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 -// CHECK1-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 -// CHECK1-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* -// CHECK1-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 -// CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 -// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[TMP10]], i64 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP10]], align 1 -// CHECK1-NEXT: [[TMP14:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK1-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[TMP15:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 -// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP14]], i16 [[TMP7]], i16 [[TMP15]]) -// CHECK1-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8 -// CHECK1-NEXT: store i8 [[TMP17]], i8* [[DOTOMP_REDUCTION_ELEMENT]], align 1 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr i8, i8* [[TMP10]], i64 1 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[DOTOMP_REDUCTION_ELEMENT]], i64 1 -// CHECK1-NEXT: store i8* [[DOTOMP_REDUCTION_ELEMENT]], i8** [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i8* [[TMP21]] to float* -// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr float, float* [[TMP23]], i64 1 -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast float* [[TMP24]] to i8* -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast float* [[TMP23]] to i32* -// CHECK1-NEXT: [[TMP27:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i32* -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK1-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[TMP29:%.*]] = trunc i32 [[NVPTX_WARP_SIZE5]] to i16 -// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP28]], i16 [[TMP7]], i16 [[TMP29]]) -// CHECK1-NEXT: store i32 [[TMP30]], i32* [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr i32, i32* [[TMP26]], i64 1 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr i32, i32* [[TMP27]], i64 1 -// CHECK1-NEXT: [[TMP33:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* -// CHECK1-NEXT: store i8* [[TMP33]], i8** [[TMP22]], align 8 -// CHECK1-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK1-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP36:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP37:%.*]] = and i1 [[TMP35]], [[TMP36]] -// CHECK1-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK1-NEXT: [[TMP39:%.*]] = and i16 [[TMP6]], 1 -// CHECK1-NEXT: [[TMP40:%.*]] = icmp eq i16 [[TMP39]], 0 -// CHECK1-NEXT: [[TMP41:%.*]] = and i1 [[TMP38]], [[TMP40]] -// CHECK1-NEXT: [[TMP42:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK1-NEXT: [[TMP43:%.*]] = and i1 [[TMP41]], [[TMP42]] -// CHECK1-NEXT: [[TMP44:%.*]] = or i1 [[TMP34]], [[TMP37]] -// CHECK1-NEXT: [[TMP45:%.*]] = or i1 [[TMP44]], [[TMP43]] -// CHECK1-NEXT: br i1 [[TMP45]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK1: then: -// CHECK1-NEXT: [[TMP46:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* -// CHECK1-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func4"(i8* [[TMP46]], i8* [[TMP47]]) #[[ATTR3]] -// CHECK1-NEXT: br label [[IFCONT:%.*]] -// CHECK1: else: -// CHECK1-NEXT: br label [[IFCONT]] -// CHECK1: ifcont: -// CHECK1-NEXT: [[TMP48:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP49:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP50:%.*]] = and i1 [[TMP48]], [[TMP49]] -// CHECK1-NEXT: br i1 [[TMP50]], label [[THEN6:%.*]], label [[ELSE7:%.*]] -// CHECK1: then6: -// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP52:%.*]] = load i8*, i8** [[TMP51]], align 8 -// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP54:%.*]] = load i8*, i8** [[TMP53]], align 8 -// CHECK1-NEXT: [[TMP55:%.*]] = load i8, i8* [[TMP52]], align 1 -// CHECK1-NEXT: store i8 [[TMP55]], i8* [[TMP54]], align 1 -// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP57:%.*]] = load i8*, i8** [[TMP56]], align 8 -// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP59:%.*]] = load i8*, i8** [[TMP58]], align 8 -// CHECK1-NEXT: [[TMP60:%.*]] = bitcast i8* [[TMP57]] to float* -// CHECK1-NEXT: [[TMP61:%.*]] = bitcast i8* [[TMP59]] to float* -// CHECK1-NEXT: [[TMP62:%.*]] = load float, float* [[TMP60]], align 4 -// CHECK1-NEXT: store float [[TMP62]], float* [[TMP61]], align 4 -// CHECK1-NEXT: br label [[IFCONT8:%.*]] -// CHECK1: else7: -// CHECK1-NEXT: br label [[IFCONT8]] -// CHECK1: ifcont8: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func6 -// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 -// CHECK1-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK1: then: -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32 addrspace(3)* [[TMP7]] to i8 addrspace(3)* -// CHECK1-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP6]], align 1 -// CHECK1-NEXT: store volatile i8 [[TMP9]], i8 addrspace(3)* [[TMP8]], align 1 -// CHECK1-NEXT: br label [[IFCONT:%.*]] -// CHECK1: else: -// CHECK1-NEXT: br label [[IFCONT]] -// CHECK1: ifcont: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] -// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK1: then4: -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i32 addrspace(3)* [[TMP11]] to i8 addrspace(3)* -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load volatile i8, i8 addrspace(3)* [[TMP12]], align 1 -// CHECK1-NEXT: store i8 [[TMP15]], i8* [[TMP14]], align 1 -// CHECK1-NEXT: br label [[IFCONT6:%.*]] -// CHECK1: else5: -// CHECK1-NEXT: br label [[IFCONT6]] -// CHECK1: ifcont6: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK1-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK1-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] -// CHECK1: then8: -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i32* -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK1-NEXT: store volatile i32 [[TMP20]], i32 addrspace(3)* [[TMP19]], align 4 -// CHECK1-NEXT: br label [[IFCONT10:%.*]] -// CHECK1: else9: -// CHECK1-NEXT: br label [[IFCONT10]] -// CHECK1: ifcont10: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP21]] -// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] -// CHECK1: then12: -// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to i32* -// CHECK1-NEXT: [[TMP26:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP22]], align 4 -// CHECK1-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4 -// CHECK1-NEXT: br label [[IFCONT14:%.*]] -// CHECK1: else13: -// CHECK1-NEXT: br label [[IFCONT14]] -// CHECK1: ifcont14: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func7 -// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.4* -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8 -// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP7]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP9]], align 1 -// CHECK1-NEXT: store i8 [[TMP11]], i8* [[TMP10]], align 128 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to float* -// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP7]] -// CHECK1-NEXT: [[TMP16:%.*]] = load float, float* [[TMP14]], align 4 -// CHECK1-NEXT: store float [[TMP16]], float* [[TMP15]], align 128 -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func8 -// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 -// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.4* -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to i8* -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func4"(i8* [[TMP11]], i8* [[TMP12]]) #[[ATTR3]] -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func9 -// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.4* -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8 -// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP7]] -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 128 -// CHECK1-NEXT: store i8 [[TMP11]], i8* [[TMP9]], align 1 -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to float* -// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP7]] -// CHECK1-NEXT: [[TMP16:%.*]] = load float, float* [[TMP15]], align 128 -// CHECK1-NEXT: store float [[TMP16]], float* [[TMP14]], align 4 -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func10 -// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 -// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.4* -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to i8* -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func4"(i8* [[TMP12]], i8* [[TMP11]]) #[[ATTR3]] -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l36 -// CHECK1-SAME: (i64 [[A:%.*]], i64 [[B:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() -// CHECK1-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK1: .execute: -// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK1-NEXT: call void @__omp_outlined__11(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[CONV]], i16* [[CONV1]]) #[[ATTR3]] -// CHECK1-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK1: .omp.deinit: -// CHECK1-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) -// CHECK1-NEXT: br label [[DOTEXIT:%.*]] -// CHECK1: .exit: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__11 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i16* nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B2:%.*]] = alloca i16, align 2 -// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[A1]], align 4 -// CHECK1-NEXT: store i16 -32768, i16* [[B2]], align 2 -// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK1-NEXT: store i8* [[TMP3]], i8** [[TMP2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i16* [[B2]] to i8* -// CHECK1-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*)* @__omp_outlined__12 to i8*), i8* null, i8** [[TMP8]], i64 2) -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i16* [[B2]] to i8* -// CHECK1-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i8* [[TMP14]], i32 1024, i8* [[TMP13]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func17, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func18, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func19, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func20, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func21, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func22) -// CHECK1-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1 -// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] -// CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK1-NEXT: [[OR:%.*]] = or i32 [[TMP17]], [[TMP18]] -// CHECK1-NEXT: store i32 [[OR]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32 -// CHECK1-NEXT: [[TMP20:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP20]] to i32 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV3]] -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP22:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] -// CHECK1-NEXT: store i16 [[COND]], i16* [[TMP1]], align 2 -// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP7]]) -// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] -// CHECK1: .omp.reduction.done: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__12 -// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i16* nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 8 -// CHECK1-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[B2:%.*]] = alloca i16, align 2 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 -// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 8 -// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK1-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 8 -// CHECK1-NEXT: store i32 0, i32* [[A1]], align 4 -// CHECK1-NEXT: store i16 -32768, i16* [[B2]], align 2 -// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK1-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1 -// CHECK1-NEXT: store i32 [[OR]], i32* [[A1]], align 4 -// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 -// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]] -// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK1: cond.true: -// CHECK1-NEXT: br label [[COND_END:%.*]] -// CHECK1: cond.false: -// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32 -// CHECK1-NEXT: br label [[COND_END]] -// CHECK1: cond.end: -// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], i16* [[B2]], align 2 -// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i16* [[B2]] to i8* -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 2, i64 16, i8* [[TMP11]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func14, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func15) -// CHECK1-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 -// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] -// CHECK1: .omp.reduction.then: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK1-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]] -// CHECK1-NEXT: store i32 [[OR5]], i32* [[TMP0]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK1-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK1-NEXT: [[TMP17:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK1-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32 -// CHECK1-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] -// CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] -// CHECK1: cond.true9: -// CHECK1-NEXT: [[TMP18:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK1-NEXT: br label [[COND_END11:%.*]] -// CHECK1: cond.false10: -// CHECK1-NEXT: [[TMP19:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK1-NEXT: br label [[COND_END11]] -// CHECK1: cond.end11: -// CHECK1-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ] -// CHECK1-NEXT: store i16 [[COND12]], i16* [[TMP1]], align 2 -// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) -// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] -// CHECK1: .omp.reduction.done: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func14 -// CHECK1-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 -// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 -// CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 8 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2 -// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 -// CHECK1-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 -// CHECK1-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* -// CHECK1-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 -// CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 -// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i64 1 -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK1-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[TMP16:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP16]]) -// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP12]], i64 1 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i64 1 -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK1-NEXT: store i8* [[TMP20]], i8** [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP22]] to i16* -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr i16, i16* [[TMP24]], i64 1 -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i16* [[TMP25]] to i8* -// CHECK1-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP24]], align 2 -// CHECK1-NEXT: [[TMP28:%.*]] = sext i16 [[TMP27]] to i32 -// CHECK1-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[TMP29:%.*]] = trunc i32 [[NVPTX_WARP_SIZE5]] to i16 -// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP28]], i16 [[TMP7]], i16 [[TMP29]]) -// CHECK1-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 -// CHECK1-NEXT: store i16 [[TMP31]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr i16, i16* [[TMP24]], i64 1 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i64 1 -// CHECK1-NEXT: [[TMP34:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* -// CHECK1-NEXT: store i8* [[TMP34]], i8** [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK1-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP37:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP38:%.*]] = and i1 [[TMP36]], [[TMP37]] -// CHECK1-NEXT: [[TMP39:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK1-NEXT: [[TMP40:%.*]] = and i16 [[TMP6]], 1 -// CHECK1-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP40]], 0 -// CHECK1-NEXT: [[TMP42:%.*]] = and i1 [[TMP39]], [[TMP41]] -// CHECK1-NEXT: [[TMP43:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK1-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] -// CHECK1-NEXT: [[TMP45:%.*]] = or i1 [[TMP35]], [[TMP38]] -// CHECK1-NEXT: [[TMP46:%.*]] = or i1 [[TMP45]], [[TMP44]] -// CHECK1-NEXT: br i1 [[TMP46]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK1: then: -// CHECK1-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* -// CHECK1-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func13"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR3]] -// CHECK1-NEXT: br label [[IFCONT:%.*]] -// CHECK1: else: -// CHECK1-NEXT: br label [[IFCONT]] -// CHECK1: ifcont: -// CHECK1-NEXT: [[TMP49:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP50:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP51:%.*]] = and i1 [[TMP49]], [[TMP50]] -// CHECK1-NEXT: br i1 [[TMP51]], label [[THEN6:%.*]], label [[ELSE7:%.*]] -// CHECK1: then6: -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP53:%.*]] = load i8*, i8** [[TMP52]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP55:%.*]] = load i8*, i8** [[TMP54]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = bitcast i8* [[TMP53]] to i32* -// CHECK1-NEXT: [[TMP57:%.*]] = bitcast i8* [[TMP55]] to i32* -// CHECK1-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP56]], align 4 -// CHECK1-NEXT: store i32 [[TMP58]], i32* [[TMP57]], align 4 -// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP60:%.*]] = load i8*, i8** [[TMP59]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP62:%.*]] = load i8*, i8** [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = bitcast i8* [[TMP60]] to i16* -// CHECK1-NEXT: [[TMP64:%.*]] = bitcast i8* [[TMP62]] to i16* -// CHECK1-NEXT: [[TMP65:%.*]] = load i16, i16* [[TMP63]], align 2 -// CHECK1-NEXT: store i16 [[TMP65]], i16* [[TMP64]], align 2 -// CHECK1-NEXT: br label [[IFCONT8:%.*]] -// CHECK1: else7: -// CHECK1-NEXT: br label [[IFCONT8]] -// CHECK1: ifcont8: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func15 -// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 -// CHECK1-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP2]]) -// CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK1: then: -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: store volatile i32 [[TMP9]], i32 addrspace(3)* [[TMP8]], align 4 -// CHECK1-NEXT: br label [[IFCONT:%.*]] -// CHECK1: else: -// CHECK1-NEXT: br label [[IFCONT]] -// CHECK1: ifcont: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] -// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK1: then4: -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to i32* -// CHECK1-NEXT: [[TMP15:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP11]], align 4 -// CHECK1-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 -// CHECK1-NEXT: br label [[IFCONT6:%.*]] -// CHECK1: else5: -// CHECK1-NEXT: br label [[IFCONT6]] -// CHECK1: ifcont6: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK1-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK1-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] -// CHECK1: then8: -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i16* -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i32 addrspace(3)* [[TMP19]] to i16 addrspace(3)* -// CHECK1-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP18]], align 2 -// CHECK1-NEXT: store volatile i16 [[TMP21]], i16 addrspace(3)* [[TMP20]], align 2 -// CHECK1-NEXT: br label [[IFCONT10:%.*]] -// CHECK1: else9: -// CHECK1-NEXT: br label [[IFCONT10]] -// CHECK1: ifcont10: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP22]] -// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] -// CHECK1: then12: -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK1-NEXT: [[TMP24:%.*]] = bitcast i32 addrspace(3)* [[TMP23]] to i16 addrspace(3)* -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP26:%.*]] = load i8*, i8** [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = bitcast i8* [[TMP26]] to i16* -// CHECK1-NEXT: [[TMP28:%.*]] = load volatile i16, i16 addrspace(3)* [[TMP24]], align 2 -// CHECK1-NEXT: store i16 [[TMP28]], i16* [[TMP27]], align 2 -// CHECK1-NEXT: br label [[IFCONT14:%.*]] -// CHECK1: else13: -// CHECK1-NEXT: br label [[IFCONT14]] -// CHECK1: ifcont14: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func17 -// CHECK1-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 -// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 -// CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 8 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2 -// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 -// CHECK1-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 -// CHECK1-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 -// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* -// CHECK1-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 -// CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 -// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i64 1 -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK1-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[TMP16:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 -// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP16]]) -// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP12]], i64 1 -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i64 1 -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK1-NEXT: store i8* [[TMP20]], i8** [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP22]] to i16* -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr i16, i16* [[TMP24]], i64 1 -// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i16* [[TMP25]] to i8* -// CHECK1-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP24]], align 2 -// CHECK1-NEXT: [[TMP28:%.*]] = sext i16 [[TMP27]] to i32 -// CHECK1-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK1-NEXT: [[TMP29:%.*]] = trunc i32 [[NVPTX_WARP_SIZE5]] to i16 -// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP28]], i16 [[TMP7]], i16 [[TMP29]]) -// CHECK1-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 -// CHECK1-NEXT: store i16 [[TMP31]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr i16, i16* [[TMP24]], i64 1 -// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i64 1 -// CHECK1-NEXT: [[TMP34:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* -// CHECK1-NEXT: store i8* [[TMP34]], i8** [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK1-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP37:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP38:%.*]] = and i1 [[TMP36]], [[TMP37]] -// CHECK1-NEXT: [[TMP39:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK1-NEXT: [[TMP40:%.*]] = and i16 [[TMP6]], 1 -// CHECK1-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP40]], 0 -// CHECK1-NEXT: [[TMP42:%.*]] = and i1 [[TMP39]], [[TMP41]] -// CHECK1-NEXT: [[TMP43:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK1-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] -// CHECK1-NEXT: [[TMP45:%.*]] = or i1 [[TMP35]], [[TMP38]] -// CHECK1-NEXT: [[TMP46:%.*]] = or i1 [[TMP45]], [[TMP44]] -// CHECK1-NEXT: br i1 [[TMP46]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK1: then: -// CHECK1-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* -// CHECK1-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func16"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR3]] -// CHECK1-NEXT: br label [[IFCONT:%.*]] -// CHECK1: else: -// CHECK1-NEXT: br label [[IFCONT]] -// CHECK1: ifcont: -// CHECK1-NEXT: [[TMP49:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK1-NEXT: [[TMP50:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK1-NEXT: [[TMP51:%.*]] = and i1 [[TMP49]], [[TMP50]] -// CHECK1-NEXT: br i1 [[TMP51]], label [[THEN6:%.*]], label [[ELSE7:%.*]] -// CHECK1: then6: -// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP53:%.*]] = load i8*, i8** [[TMP52]], align 8 -// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP55:%.*]] = load i8*, i8** [[TMP54]], align 8 -// CHECK1-NEXT: [[TMP56:%.*]] = bitcast i8* [[TMP53]] to i32* -// CHECK1-NEXT: [[TMP57:%.*]] = bitcast i8* [[TMP55]] to i32* -// CHECK1-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP56]], align 4 -// CHECK1-NEXT: store i32 [[TMP58]], i32* [[TMP57]], align 4 -// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP60:%.*]] = load i8*, i8** [[TMP59]], align 8 -// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP62:%.*]] = load i8*, i8** [[TMP61]], align 8 -// CHECK1-NEXT: [[TMP63:%.*]] = bitcast i8* [[TMP60]] to i16* -// CHECK1-NEXT: [[TMP64:%.*]] = bitcast i8* [[TMP62]] to i16* -// CHECK1-NEXT: [[TMP65:%.*]] = load i16, i16* [[TMP63]], align 2 -// CHECK1-NEXT: store i16 [[TMP65]], i16* [[TMP64]], align 2 -// CHECK1-NEXT: br label [[IFCONT8:%.*]] -// CHECK1: else7: -// CHECK1-NEXT: br label [[IFCONT8]] -// CHECK1: ifcont8: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func18 -// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 -// CHECK1-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK1: then: -// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK1-NEXT: store volatile i32 [[TMP9]], i32 addrspace(3)* [[TMP8]], align 4 -// CHECK1-NEXT: br label [[IFCONT:%.*]] -// CHECK1: else: -// CHECK1-NEXT: br label [[IFCONT]] -// CHECK1: ifcont: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] -// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK1: then4: -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8 -// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to i32* -// CHECK1-NEXT: [[TMP15:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP11]], align 4 -// CHECK1-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 -// CHECK1-NEXT: br label [[IFCONT6:%.*]] -// CHECK1: else5: -// CHECK1-NEXT: br label [[IFCONT6]] -// CHECK1: ifcont6: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK1-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK1-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] -// CHECK1: then8: -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i16* -// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i32 addrspace(3)* [[TMP19]] to i16 addrspace(3)* -// CHECK1-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP18]], align 2 -// CHECK1-NEXT: store volatile i16 [[TMP21]], i16 addrspace(3)* [[TMP20]], align 2 -// CHECK1-NEXT: br label [[IFCONT10:%.*]] -// CHECK1: else9: -// CHECK1-NEXT: br label [[IFCONT10]] -// CHECK1: ifcont10: -// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP22]] -// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] -// CHECK1: then12: -// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK1-NEXT: [[TMP24:%.*]] = bitcast i32 addrspace(3)* [[TMP23]] to i16 addrspace(3)* -// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP26:%.*]] = load i8*, i8** [[TMP25]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = bitcast i8* [[TMP26]] to i16* -// CHECK1-NEXT: [[TMP28:%.*]] = load volatile i16, i16 addrspace(3)* [[TMP24]], align 2 -// CHECK1-NEXT: store i16 [[TMP28]], i16* [[TMP27]], align 2 -// CHECK1-NEXT: br label [[IFCONT14:%.*]] -// CHECK1: else13: -// CHECK1-NEXT: br label [[IFCONT14]] -// CHECK1: ifcont14: -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func19 -// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.5* -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_5:%.*]], %struct._globalized_locals_ty.5* [[TMP6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP7]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK1-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 128 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i16* -// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_5]], %struct._globalized_locals_ty.5* [[TMP6]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP7]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP15]], align 2 -// CHECK1-NEXT: store i16 [[TMP17]], i16* [[TMP16]], align 128 -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func20 -// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 -// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.5* -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_5:%.*]], %struct._globalized_locals_ty.5* [[TMP4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* -// CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_5]], %struct._globalized_locals_ty.5* [[TMP4]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to i8* -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func16"(i8* [[TMP12]], i8* [[TMP13]]) #[[ATTR3]] -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func21 -// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.5* -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_5:%.*]], %struct._globalized_locals_ty.5* [[TMP6]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP7]] -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 128 -// CHECK1-NEXT: store i32 [[TMP12]], i32* [[TMP10]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 -// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i16* -// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_5]], %struct._globalized_locals_ty.5* [[TMP6]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP7]] -// CHECK1-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP16]], align 128 -// CHECK1-NEXT: store i16 [[TMP17]], i16* [[TMP15]], align 2 -// CHECK1-NEXT: ret void -// -// -// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func22 -// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK1-NEXT: entry: -// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 -// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 -// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.5* -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_5:%.*]], %struct._globalized_locals_ty.5* [[TMP4]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* -// CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_5]], %struct._globalized_locals_ty.5* [[TMP4]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP5]] -// CHECK1-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to i8* -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK1-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK1-NEXT: call void @"_omp$reduction$reduction_func16"(i8* [[TMP13]], i8* [[TMP12]]) #[[ATTR3]] -// CHECK1-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_worker -// CHECK2-SAME: () #[[ATTR0:[0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK2-NEXT: store i8* null, i8** [[WORK_FN]], align 8 -// CHECK2-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK2-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK2: .await.work: -// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK2-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK2-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK2-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK2-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK2-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK2: .select.workers: -// CHECK2-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK2-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK2-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK2: .execute.parallel: -// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) -// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK2-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) -// CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK2: .terminate.parallel: -// CHECK2-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK2-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK2: .barrier.parallel: -// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK2-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK2: .exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23 -// CHECK2-SAME: (i64 [[E:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[E_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: store i64 [[E]], i64* [[E_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[E_ADDR]] to double* -// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK2-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK2-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK2: .worker: -// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_worker() #[[ATTR3:[0-9]+]] -// CHECK2-NEXT: br label [[DOTEXIT:%.*]] -// CHECK2: .mastercheck: -// CHECK2-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK2-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK2-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK2-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK2-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK2-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK2: .master: -// CHECK2-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK2-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i64 8, i16 1) -// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty* -// CHECK2-NEXT: [[TMP7:%.*]] = load double, double* [[CONV]], align 8 -// CHECK2-NEXT: [[E7:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: store double [[TMP7]], double* [[E7]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-NEXT: store i32 [[TMP8]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E7]]) #[[ATTR3]] -// CHECK2-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP5]]) -// CHECK2-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK2: .termination.notifier: -// CHECK2-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK2-NEXT: br label [[DOTEXIT]] -// CHECK2: .exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], double* nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[E_ADDR:%.*]] = alloca double*, align 8 -// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store double* [[E]], double** [[E_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i64 8, i16 1) -// CHECK2-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct._globalized_locals_ty.0* -// CHECK2-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP2]], i32 0, i32 0 -// CHECK2-NEXT: store double 0.000000e+00, double* [[E1]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load double, double* [[E1]], align 8 -// CHECK2-NEXT: [[ADD:%.*]] = fadd double [[TMP3]], 5.000000e+00 -// CHECK2-NEXT: store double [[ADD]], double* [[E1]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP7:%.*]] = bitcast double* [[E1]] to i8* -// CHECK2-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK2-NEXT: [[TMP9:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i8* [[TMP9]], i32 1024, i8* [[TMP8]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func) -// CHECK2-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 1 -// CHECK2-NEXT: br i1 [[TMP11]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] -// CHECK2: .omp.reduction.then: -// CHECK2-NEXT: [[TMP12:%.*]] = load double, double* [[TMP0]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = load double, double* [[E1]], align 8 -// CHECK2-NEXT: [[ADD2:%.*]] = fadd double [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store double [[ADD2]], double* [[TMP0]], align 8 -// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) -// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] -// CHECK2: .omp.reduction.done: -// CHECK2-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP1]]) -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func -// CHECK2-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 -// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 -// CHECK2-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK2-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 8 -// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8 -// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 -// CHECK2-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 -// CHECK2-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 -// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* -// CHECK2-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 -// CHECK2-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 -// CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to double* -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP12]], i64 1 -// CHECK2-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8* -// CHECK2-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP12]] to i64* -// CHECK2-NEXT: [[TMP16:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i64* -// CHECK2-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 8 -// CHECK2-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[TMP18:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 -// CHECK2-NEXT: [[TMP19:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP18]]) -// CHECK2-NEXT: store i64 [[TMP19]], i64* [[TMP16]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr i64, i64* [[TMP15]], i64 1 -// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP16]], i64 1 -// CHECK2-NEXT: [[TMP22:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK2-NEXT: store i8* [[TMP22]], i8** [[TMP11]], align 8 -// CHECK2-NEXT: [[TMP23:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK2-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK2-NEXT: [[TMP25:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK2-NEXT: [[TMP26:%.*]] = and i1 [[TMP24]], [[TMP25]] -// CHECK2-NEXT: [[TMP27:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK2-NEXT: [[TMP28:%.*]] = and i16 [[TMP6]], 1 -// CHECK2-NEXT: [[TMP29:%.*]] = icmp eq i16 [[TMP28]], 0 -// CHECK2-NEXT: [[TMP30:%.*]] = and i1 [[TMP27]], [[TMP29]] -// CHECK2-NEXT: [[TMP31:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK2-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]] -// CHECK2-NEXT: [[TMP33:%.*]] = or i1 [[TMP23]], [[TMP26]] -// CHECK2-NEXT: [[TMP34:%.*]] = or i1 [[TMP33]], [[TMP32]] -// CHECK2-NEXT: br i1 [[TMP34]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK2: then: -// CHECK2-NEXT: [[TMP35:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* -// CHECK2-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR3]] -// CHECK2-NEXT: br label [[IFCONT:%.*]] -// CHECK2: else: -// CHECK2-NEXT: br label [[IFCONT]] -// CHECK2: ifcont: -// CHECK2-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK2-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK2-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] -// CHECK2-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK2: then4: -// CHECK2-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP41:%.*]] = load i8*, i8** [[TMP40]], align 8 -// CHECK2-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 8 -// CHECK2-NEXT: [[TMP44:%.*]] = bitcast i8* [[TMP41]] to double* -// CHECK2-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP43]] to double* -// CHECK2-NEXT: [[TMP46:%.*]] = load double, double* [[TMP44]], align 8 -// CHECK2-NEXT: store double [[TMP46]], double* [[TMP45]], align 8 -// CHECK2-NEXT: br label [[IFCONT6:%.*]] -// CHECK2: else5: -// CHECK2-NEXT: br label [[IFCONT6]] -// CHECK2: ifcont6: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func -// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK2-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK2-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 -// CHECK2-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK2-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 -// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* -// CHECK2-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4 -// CHECK2-NEXT: br label [[PRECOND:%.*]] -// CHECK2: precond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP5]], 2 -// CHECK2-NEXT: br i1 [[TMP6]], label [[BODY:%.*]], label [[EXIT:%.*]] -// CHECK2: body: -// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) -// CHECK2-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK2-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK2: then: -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[TMP9]], i32 [[TMP5]] -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK2-NEXT: store volatile i32 [[TMP12]], i32 addrspace(3)* [[TMP11]], align 4 -// CHECK2-NEXT: br label [[IFCONT:%.*]] -// CHECK2: else: -// CHECK2-NEXT: br label [[IFCONT]] -// CHECK2: ifcont: -// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP13]] -// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK2: then4: -// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8 -// CHECK2-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i32* -// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP17]], i32 [[TMP5]] -// CHECK2-NEXT: [[TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4 -// CHECK2-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4 -// CHECK2-NEXT: br label [[IFCONT6:%.*]] -// CHECK2: else5: -// CHECK2-NEXT: br label [[IFCONT6]] -// CHECK2: ifcont6: -// CHECK2-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK2-NEXT: store i32 [[TMP20]], i32* [[DOTCNT_ADDR]], align 4 -// CHECK2-NEXT: br label [[PRECOND]] -// CHECK2: exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func -// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* -// CHECK2-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.1* -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to double* -// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x double], [1024 x double]* [[E]], i32 0, i32 [[TMP7]] -// CHECK2-NEXT: [[TMP12:%.*]] = load double, double* [[TMP10]], align 8 -// CHECK2-NEXT: store double [[TMP12]], double* [[TMP11]], align 128 -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func -// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 -// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.1* -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x double], [1024 x double]* [[E]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: [[TMP8:%.*]] = bitcast double* [[TMP7]] to i8* -// CHECK2-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP9]], i8* [[TMP10]]) #[[ATTR3]] -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func -// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* -// CHECK2-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.1* -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to double* -// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x double], [1024 x double]* [[E]], i32 0, i32 [[TMP7]] -// CHECK2-NEXT: [[TMP12:%.*]] = load double, double* [[TMP11]], align 128 -// CHECK2-NEXT: store double [[TMP12]], double* [[TMP10]], align 8 -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func -// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 -// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.1* -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x double], [1024 x double]* [[E]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: [[TMP8:%.*]] = bitcast double* [[TMP7]] to i8* -// CHECK2-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP10]], i8* [[TMP9]]) #[[ATTR3]] -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_worker -// CHECK2-SAME: () #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK2-NEXT: store i8* null, i8** [[WORK_FN]], align 8 -// CHECK2-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK2-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK2: .await.work: -// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK2-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK2-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK2-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 -// CHECK2-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK2-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK2: .select.workers: -// CHECK2-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK2-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK2-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK2: .execute.parallel: -// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK2-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) -// CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK2: .terminate.parallel: -// CHECK2-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK2-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK2: .barrier.parallel: -// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK2-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK2: .exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29 -// CHECK2-SAME: (i64 [[C:%.*]], i64 [[D:%.*]]) #[[ATTR1]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[D]], i64* [[D_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[C_ADDR]] to i8* -// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[D_ADDR]] to float* -// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK2-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK2-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK2: .worker: -// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_worker() #[[ATTR3]] -// CHECK2-NEXT: br label [[DOTEXIT:%.*]] -// CHECK2: .mastercheck: -// CHECK2-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK2-NEXT: [[NVPTX_NUM_THREADS3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[NVPTX_WARP_SIZE4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE4]], 1 -// CHECK2-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS3]], 1 -// CHECK2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK2-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK2-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID2]], [[MASTER_TID]] -// CHECK2-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK2: .master: -// CHECK2-NEXT: [[NVPTX_NUM_THREADS5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: [[NVPTX_WARP_SIZE6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[THREAD_LIMIT7:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS5]], [[NVPTX_WARP_SIZE6]] -// CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT7]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK2-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i64 8, i16 1) -// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.2* -// CHECK2-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 8 -// CHECK2-NEXT: [[C8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], %struct._globalized_locals_ty.2* [[TMP6]], i32 0, i32 1 -// CHECK2-NEXT: store i8 [[TMP7]], i8* [[C8]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load float, float* [[CONV1]], align 8 -// CHECK2-NEXT: [[D9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], %struct._globalized_locals_ty.2* [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: store float [[TMP8]], float* [[D9]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-NEXT: store i32 [[TMP9]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C8]], float* [[D9]]) #[[ATTR3]] -// CHECK2-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP5]]) -// CHECK2-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK2: .termination.notifier: -// CHECK2-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK2-NEXT: br label [[DOTEXIT]] -// CHECK2: .exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8* nonnull align 1 dereferenceable(1) [[C:%.*]], float* nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 -// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 8 -// CHECK2-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 8 -// CHECK2-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i64 8, i16 1) -// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct._globalized_locals_ty.3* -// CHECK2-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3:%.*]], %struct._globalized_locals_ty.3* [[TMP3]], i32 0, i32 1 -// CHECK2-NEXT: [[D2:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3]], %struct._globalized_locals_ty.3* [[TMP3]], i32 0, i32 0 -// CHECK2-NEXT: store i8 0, i8* [[C1]], align 4 -// CHECK2-NEXT: store float 1.000000e+00, float* [[D2]], align 4 -// CHECK2-NEXT: [[TMP4:%.*]] = load i8, i8* [[C1]], align 4 -// CHECK2-NEXT: [[CONV:%.*]] = sext i8 [[TMP4]] to i32 -// CHECK2-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2 -// CHECK2-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8 -// CHECK2-NEXT: store i8 [[CONV3]], i8* [[C1]], align 4 -// CHECK2-NEXT: [[TMP5:%.*]] = load float, float* [[D2]], align 4 -// CHECK2-NEXT: [[MUL:%.*]] = fmul float [[TMP5]], 3.300000e+01 -// CHECK2-NEXT: store float [[MUL]], float* [[D2]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK2-NEXT: store i8* [[C1]], i8** [[TMP8]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP10:%.*]] = bitcast float* [[D2]] to i8* -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK2-NEXT: [[TMP12:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i8* [[TMP12]], i32 1024, i8* [[TMP11]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func3, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func4, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func5, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func6, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func7, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func8) -// CHECK2-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 1 -// CHECK2-NEXT: br i1 [[TMP14]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] -// CHECK2: .omp.reduction.then: -// CHECK2-NEXT: [[TMP15:%.*]] = load i8, i8* [[TMP0]], align 1 -// CHECK2-NEXT: [[CONV4:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK2-NEXT: [[TMP16:%.*]] = load i8, i8* [[C1]], align 4 -// CHECK2-NEXT: [[CONV5:%.*]] = sext i8 [[TMP16]] to i32 -// CHECK2-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] -// CHECK2-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 -// CHECK2-NEXT: store i8 [[CONV7]], i8* [[TMP0]], align 1 -// CHECK2-NEXT: [[TMP17:%.*]] = load float, float* [[TMP1]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load float, float* [[D2]], align 4 -// CHECK2-NEXT: [[MUL8:%.*]] = fmul float [[TMP17]], [[TMP18]] -// CHECK2-NEXT: store float [[MUL8]], float* [[TMP1]], align 4 -// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP7]]) -// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] -// CHECK2: .omp.reduction.done: -// CHECK2-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP2]]) -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func3 -// CHECK2-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 -// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 -// CHECK2-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK2-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 8 -// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i8, align 1 -// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca float, align 4 -// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 -// CHECK2-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 -// CHECK2-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 -// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* -// CHECK2-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 -// CHECK2-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 -// CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[TMP10]], i64 1 -// CHECK2-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP10]], align 1 -// CHECK2-NEXT: [[TMP14:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK2-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[TMP15:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 -// CHECK2-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP14]], i16 [[TMP7]], i16 [[TMP15]]) -// CHECK2-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8 -// CHECK2-NEXT: store i8 [[TMP17]], i8* [[DOTOMP_REDUCTION_ELEMENT]], align 1 -// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr i8, i8* [[TMP10]], i64 1 -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[DOTOMP_REDUCTION_ELEMENT]], i64 1 -// CHECK2-NEXT: store i8* [[DOTOMP_REDUCTION_ELEMENT]], i8** [[TMP11]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP23:%.*]] = bitcast i8* [[TMP21]] to float* -// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr float, float* [[TMP23]], i64 1 -// CHECK2-NEXT: [[TMP25:%.*]] = bitcast float* [[TMP24]] to i8* -// CHECK2-NEXT: [[TMP26:%.*]] = bitcast float* [[TMP23]] to i32* -// CHECK2-NEXT: [[TMP27:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i32* -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[TMP29:%.*]] = trunc i32 [[NVPTX_WARP_SIZE5]] to i16 -// CHECK2-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP28]], i16 [[TMP7]], i16 [[TMP29]]) -// CHECK2-NEXT: store i32 [[TMP30]], i32* [[TMP27]], align 4 -// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr i32, i32* [[TMP26]], i64 1 -// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr i32, i32* [[TMP27]], i64 1 -// CHECK2-NEXT: [[TMP33:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* -// CHECK2-NEXT: store i8* [[TMP33]], i8** [[TMP22]], align 8 -// CHECK2-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK2-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK2-NEXT: [[TMP36:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK2-NEXT: [[TMP37:%.*]] = and i1 [[TMP35]], [[TMP36]] -// CHECK2-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK2-NEXT: [[TMP39:%.*]] = and i16 [[TMP6]], 1 -// CHECK2-NEXT: [[TMP40:%.*]] = icmp eq i16 [[TMP39]], 0 -// CHECK2-NEXT: [[TMP41:%.*]] = and i1 [[TMP38]], [[TMP40]] -// CHECK2-NEXT: [[TMP42:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK2-NEXT: [[TMP43:%.*]] = and i1 [[TMP41]], [[TMP42]] -// CHECK2-NEXT: [[TMP44:%.*]] = or i1 [[TMP34]], [[TMP37]] -// CHECK2-NEXT: [[TMP45:%.*]] = or i1 [[TMP44]], [[TMP43]] -// CHECK2-NEXT: br i1 [[TMP45]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK2: then: -// CHECK2-NEXT: [[TMP46:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* -// CHECK2-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP46]], i8* [[TMP47]]) #[[ATTR3]] -// CHECK2-NEXT: br label [[IFCONT:%.*]] -// CHECK2: else: -// CHECK2-NEXT: br label [[IFCONT]] -// CHECK2: ifcont: -// CHECK2-NEXT: [[TMP48:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK2-NEXT: [[TMP49:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK2-NEXT: [[TMP50:%.*]] = and i1 [[TMP48]], [[TMP49]] -// CHECK2-NEXT: br i1 [[TMP50]], label [[THEN6:%.*]], label [[ELSE7:%.*]] -// CHECK2: then6: -// CHECK2-NEXT: [[TMP51:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP52:%.*]] = load i8*, i8** [[TMP51]], align 8 -// CHECK2-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP54:%.*]] = load i8*, i8** [[TMP53]], align 8 -// CHECK2-NEXT: [[TMP55:%.*]] = load i8, i8* [[TMP52]], align 1 -// CHECK2-NEXT: store i8 [[TMP55]], i8* [[TMP54]], align 1 -// CHECK2-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP57:%.*]] = load i8*, i8** [[TMP56]], align 8 -// CHECK2-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP59:%.*]] = load i8*, i8** [[TMP58]], align 8 -// CHECK2-NEXT: [[TMP60:%.*]] = bitcast i8* [[TMP57]] to float* -// CHECK2-NEXT: [[TMP61:%.*]] = bitcast i8* [[TMP59]] to float* -// CHECK2-NEXT: [[TMP62:%.*]] = load float, float* [[TMP60]], align 4 -// CHECK2-NEXT: store float [[TMP62]], float* [[TMP61]], align 4 -// CHECK2-NEXT: br label [[IFCONT8:%.*]] -// CHECK2: else7: -// CHECK2-NEXT: br label [[IFCONT8]] -// CHECK2: ifcont8: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func4 -// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK2-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK2-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 -// CHECK2-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK2-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 -// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK2-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK2-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK2: then: -// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i32 addrspace(3)* [[TMP7]] to i8 addrspace(3)* -// CHECK2-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP6]], align 1 -// CHECK2-NEXT: store volatile i8 [[TMP9]], i8 addrspace(3)* [[TMP8]], align 1 -// CHECK2-NEXT: br label [[IFCONT:%.*]] -// CHECK2: else: -// CHECK2-NEXT: br label [[IFCONT]] -// CHECK2: ifcont: -// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] -// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK2: then4: -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK2-NEXT: [[TMP12:%.*]] = bitcast i32 addrspace(3)* [[TMP11]] to i8 addrspace(3)* -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = load volatile i8, i8 addrspace(3)* [[TMP12]], align 1 -// CHECK2-NEXT: store i8 [[TMP15]], i8* [[TMP14]], align 1 -// CHECK2-NEXT: br label [[IFCONT6:%.*]] -// CHECK2: else5: -// CHECK2-NEXT: br label [[IFCONT6]] -// CHECK2: ifcont6: -// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK2-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK2-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] -// CHECK2: then8: -// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i32* -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK2-NEXT: store volatile i32 [[TMP20]], i32 addrspace(3)* [[TMP19]], align 4 -// CHECK2-NEXT: br label [[IFCONT10:%.*]] -// CHECK2: else9: -// CHECK2-NEXT: br label [[IFCONT10]] -// CHECK2: ifcont10: -// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP21]] -// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] -// CHECK2: then12: -// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 8 -// CHECK2-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to i32* -// CHECK2-NEXT: [[TMP26:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP22]], align 4 -// CHECK2-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4 -// CHECK2-NEXT: br label [[IFCONT14:%.*]] -// CHECK2: else13: -// CHECK2-NEXT: br label [[IFCONT14]] -// CHECK2: ifcont14: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func5 -// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK2-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.4* -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8 -// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP7]] -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP9]], align 1 -// CHECK2-NEXT: store i8 [[TMP11]], i8* [[TMP10]], align 128 -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to float* -// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP7]] -// CHECK2-NEXT: [[TMP16:%.*]] = load float, float* [[TMP14]], align 4 -// CHECK2-NEXT: store float [[TMP16]], float* [[TMP15]], align 128 -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func6 -// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 -// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.4* -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to i8* -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK2-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP11]], i8* [[TMP12]]) #[[ATTR3]] -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func7 -// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK2-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.4* -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8 -// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP7]] -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 128 -// CHECK2-NEXT: store i8 [[TMP11]], i8* [[TMP9]], align 1 -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to float* -// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP7]] -// CHECK2-NEXT: [[TMP16:%.*]] = load float, float* [[TMP15]], align 128 -// CHECK2-NEXT: store float [[TMP16]], float* [[TMP14]], align 4 -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func8 -// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 -// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.4* -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to i8* -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK2-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP12]], i8* [[TMP11]]) #[[ATTR3]] -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l36 -// CHECK2-SAME: (i64 [[A:%.*]], i64 [[B:%.*]]) #[[ATTR1]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 -// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK2-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 -// CHECK2-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 -// CHECK2-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* -// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* -// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK2-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() -// CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK2: .execute: -// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK2-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[CONV]], i16* [[CONV1]]) #[[ATTR3]] -// CHECK2-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK2: .omp.deinit: -// CHECK2-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) -// CHECK2-NEXT: br label [[DOTEXIT:%.*]] -// CHECK2: .exit: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i16* nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 8 -// CHECK2-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[B2:%.*]] = alloca i16, align 2 -// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 -// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK2-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[A1]], align 4 -// CHECK2-NEXT: store i16 -32768, i16* [[B2]], align 2 -// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK2-NEXT: store i8* [[TMP3]], i8** [[TMP2]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i16* [[B2]] to i8* -// CHECK2-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*)* @__omp_outlined__10 to i8*), i8* null, i8** [[TMP8]], i64 2) -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP12:%.*]] = bitcast i16* [[B2]] to i8* -// CHECK2-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i8* [[TMP14]], i32 1024, i8* [[TMP13]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func15, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func16, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func17, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func18, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func19, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func20) -// CHECK2-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1 -// CHECK2-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] -// CHECK2: .omp.reduction.then: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK2-NEXT: [[OR:%.*]] = or i32 [[TMP17]], [[TMP18]] -// CHECK2-NEXT: store i32 [[OR]], i32* [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32 -// CHECK2-NEXT: [[TMP20:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK2-NEXT: [[CONV3:%.*]] = sext i16 [[TMP20]] to i32 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV3]] -// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK2: cond.true: -// CHECK2-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK2-NEXT: br label [[COND_END:%.*]] -// CHECK2: cond.false: -// CHECK2-NEXT: [[TMP22:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK2-NEXT: br label [[COND_END]] -// CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] -// CHECK2-NEXT: store i16 [[COND]], i16* [[TMP1]], align 2 -// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP7]]) -// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] -// CHECK2: .omp.reduction.done: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i16* nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 -// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 8 -// CHECK2-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[B2:%.*]] = alloca i16, align 2 -// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 -// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 -// CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 -// CHECK2-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 8 -// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 -// CHECK2-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 8 -// CHECK2-NEXT: store i32 0, i32* [[A1]], align 4 -// CHECK2-NEXT: store i16 -32768, i16* [[B2]], align 2 -// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK2-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1 -// CHECK2-NEXT: store i32 [[OR]], i32* [[A1]], align 4 -// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 -// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]] -// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK2: cond.true: -// CHECK2-NEXT: br label [[COND_END:%.*]] -// CHECK2: cond.false: -// CHECK2-NEXT: [[TMP4:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK2-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32 -// CHECK2-NEXT: br label [[COND_END]] -// CHECK2: cond.end: -// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16 -// CHECK2-NEXT: store i16 [[CONV4]], i16* [[B2]], align 2 -// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK2-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i16* [[B2]] to i8* -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK2-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 2, i64 16, i8* [[TMP11]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func12, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func13) -// CHECK2-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 -// CHECK2-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] -// CHECK2: .omp.reduction.then: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK2-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]] -// CHECK2-NEXT: store i32 [[OR5]], i32* [[TMP0]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK2-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK2-NEXT: [[TMP17:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK2-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32 -// CHECK2-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] -// CHECK2-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] -// CHECK2: cond.true9: -// CHECK2-NEXT: [[TMP18:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK2-NEXT: br label [[COND_END11:%.*]] -// CHECK2: cond.false10: -// CHECK2-NEXT: [[TMP19:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK2-NEXT: br label [[COND_END11]] -// CHECK2: cond.end11: -// CHECK2-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ] -// CHECK2-NEXT: store i16 [[COND12]], i16* [[TMP1]], align 2 -// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) -// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] -// CHECK2: .omp.reduction.done: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func12 -// CHECK2-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 -// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 -// CHECK2-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK2-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 8 -// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2 -// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 -// CHECK2-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 -// CHECK2-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 -// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* -// CHECK2-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 -// CHECK2-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 -// CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i64 1 -// CHECK2-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK2-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[TMP16:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 -// CHECK2-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP16]]) -// CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP12]], i64 1 -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i64 1 -// CHECK2-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK2-NEXT: store i8* [[TMP20]], i8** [[TMP11]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 8 -// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP22]] to i16* -// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr i16, i16* [[TMP24]], i64 1 -// CHECK2-NEXT: [[TMP26:%.*]] = bitcast i16* [[TMP25]] to i8* -// CHECK2-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP24]], align 2 -// CHECK2-NEXT: [[TMP28:%.*]] = sext i16 [[TMP27]] to i32 -// CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[TMP29:%.*]] = trunc i32 [[NVPTX_WARP_SIZE5]] to i16 -// CHECK2-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP28]], i16 [[TMP7]], i16 [[TMP29]]) -// CHECK2-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 -// CHECK2-NEXT: store i16 [[TMP31]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr i16, i16* [[TMP24]], i64 1 -// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i64 1 -// CHECK2-NEXT: [[TMP34:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* -// CHECK2-NEXT: store i8* [[TMP34]], i8** [[TMP23]], align 8 -// CHECK2-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK2-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK2-NEXT: [[TMP37:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK2-NEXT: [[TMP38:%.*]] = and i1 [[TMP36]], [[TMP37]] -// CHECK2-NEXT: [[TMP39:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK2-NEXT: [[TMP40:%.*]] = and i16 [[TMP6]], 1 -// CHECK2-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP40]], 0 -// CHECK2-NEXT: [[TMP42:%.*]] = and i1 [[TMP39]], [[TMP41]] -// CHECK2-NEXT: [[TMP43:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK2-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] -// CHECK2-NEXT: [[TMP45:%.*]] = or i1 [[TMP35]], [[TMP38]] -// CHECK2-NEXT: [[TMP46:%.*]] = or i1 [[TMP45]], [[TMP44]] -// CHECK2-NEXT: br i1 [[TMP46]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK2: then: -// CHECK2-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* -// CHECK2-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func11"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR3]] -// CHECK2-NEXT: br label [[IFCONT:%.*]] -// CHECK2: else: -// CHECK2-NEXT: br label [[IFCONT]] -// CHECK2: ifcont: -// CHECK2-NEXT: [[TMP49:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK2-NEXT: [[TMP50:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK2-NEXT: [[TMP51:%.*]] = and i1 [[TMP49]], [[TMP50]] -// CHECK2-NEXT: br i1 [[TMP51]], label [[THEN6:%.*]], label [[ELSE7:%.*]] -// CHECK2: then6: -// CHECK2-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP53:%.*]] = load i8*, i8** [[TMP52]], align 8 -// CHECK2-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP55:%.*]] = load i8*, i8** [[TMP54]], align 8 -// CHECK2-NEXT: [[TMP56:%.*]] = bitcast i8* [[TMP53]] to i32* -// CHECK2-NEXT: [[TMP57:%.*]] = bitcast i8* [[TMP55]] to i32* -// CHECK2-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP56]], align 4 -// CHECK2-NEXT: store i32 [[TMP58]], i32* [[TMP57]], align 4 -// CHECK2-NEXT: [[TMP59:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP60:%.*]] = load i8*, i8** [[TMP59]], align 8 -// CHECK2-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP62:%.*]] = load i8*, i8** [[TMP61]], align 8 -// CHECK2-NEXT: [[TMP63:%.*]] = bitcast i8* [[TMP60]] to i16* -// CHECK2-NEXT: [[TMP64:%.*]] = bitcast i8* [[TMP62]] to i16* -// CHECK2-NEXT: [[TMP65:%.*]] = load i16, i16* [[TMP63]], align 2 -// CHECK2-NEXT: store i16 [[TMP65]], i16* [[TMP64]], align 2 -// CHECK2-NEXT: br label [[IFCONT8:%.*]] -// CHECK2: else7: -// CHECK2-NEXT: br label [[IFCONT8]] -// CHECK2: ifcont8: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func13 -// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK2-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK2-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 -// CHECK2-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK2-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 -// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP2]]) -// CHECK2-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK2-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK2: then: -// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK2-NEXT: store volatile i32 [[TMP9]], i32 addrspace(3)* [[TMP8]], align 4 -// CHECK2-NEXT: br label [[IFCONT:%.*]] -// CHECK2: else: -// CHECK2-NEXT: br label [[IFCONT]] -// CHECK2: ifcont: -// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] -// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK2: then4: -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to i32* -// CHECK2-NEXT: [[TMP15:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP11]], align 4 -// CHECK2-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 -// CHECK2-NEXT: br label [[IFCONT6:%.*]] -// CHECK2: else5: -// CHECK2-NEXT: br label [[IFCONT6]] -// CHECK2: ifcont6: -// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK2-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK2-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] -// CHECK2: then8: -// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i16* -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK2-NEXT: [[TMP20:%.*]] = bitcast i32 addrspace(3)* [[TMP19]] to i16 addrspace(3)* -// CHECK2-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP18]], align 2 -// CHECK2-NEXT: store volatile i16 [[TMP21]], i16 addrspace(3)* [[TMP20]], align 2 -// CHECK2-NEXT: br label [[IFCONT10:%.*]] -// CHECK2: else9: -// CHECK2-NEXT: br label [[IFCONT10]] -// CHECK2: ifcont10: -// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP22]] -// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] -// CHECK2: then12: -// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK2-NEXT: [[TMP24:%.*]] = bitcast i32 addrspace(3)* [[TMP23]] to i16 addrspace(3)* -// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP26:%.*]] = load i8*, i8** [[TMP25]], align 8 -// CHECK2-NEXT: [[TMP27:%.*]] = bitcast i8* [[TMP26]] to i16* -// CHECK2-NEXT: [[TMP28:%.*]] = load volatile i16, i16 addrspace(3)* [[TMP24]], align 2 -// CHECK2-NEXT: store i16 [[TMP28]], i16* [[TMP27]], align 2 -// CHECK2-NEXT: br label [[IFCONT14:%.*]] -// CHECK2: else13: -// CHECK2-NEXT: br label [[IFCONT14]] -// CHECK2: ifcont14: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func15 -// CHECK2-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 -// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 -// CHECK2-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK2-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 8 -// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2 -// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 -// CHECK2-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 -// CHECK2-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 -// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* -// CHECK2-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 -// CHECK2-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 -// CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i64 1 -// CHECK2-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK2-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[TMP16:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 -// CHECK2-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP16]]) -// CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP12]], i64 1 -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i64 1 -// CHECK2-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK2-NEXT: store i8* [[TMP20]], i8** [[TMP11]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 8 -// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP22]] to i16* -// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr i16, i16* [[TMP24]], i64 1 -// CHECK2-NEXT: [[TMP26:%.*]] = bitcast i16* [[TMP25]] to i8* -// CHECK2-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP24]], align 2 -// CHECK2-NEXT: [[TMP28:%.*]] = sext i16 [[TMP27]] to i32 -// CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK2-NEXT: [[TMP29:%.*]] = trunc i32 [[NVPTX_WARP_SIZE5]] to i16 -// CHECK2-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP28]], i16 [[TMP7]], i16 [[TMP29]]) -// CHECK2-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 -// CHECK2-NEXT: store i16 [[TMP31]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr i16, i16* [[TMP24]], i64 1 -// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i64 1 -// CHECK2-NEXT: [[TMP34:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* -// CHECK2-NEXT: store i8* [[TMP34]], i8** [[TMP23]], align 8 -// CHECK2-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK2-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK2-NEXT: [[TMP37:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK2-NEXT: [[TMP38:%.*]] = and i1 [[TMP36]], [[TMP37]] -// CHECK2-NEXT: [[TMP39:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK2-NEXT: [[TMP40:%.*]] = and i16 [[TMP6]], 1 -// CHECK2-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP40]], 0 -// CHECK2-NEXT: [[TMP42:%.*]] = and i1 [[TMP39]], [[TMP41]] -// CHECK2-NEXT: [[TMP43:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK2-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] -// CHECK2-NEXT: [[TMP45:%.*]] = or i1 [[TMP35]], [[TMP38]] -// CHECK2-NEXT: [[TMP46:%.*]] = or i1 [[TMP45]], [[TMP44]] -// CHECK2-NEXT: br i1 [[TMP46]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK2: then: -// CHECK2-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* -// CHECK2-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR3]] -// CHECK2-NEXT: br label [[IFCONT:%.*]] -// CHECK2: else: -// CHECK2-NEXT: br label [[IFCONT]] -// CHECK2: ifcont: -// CHECK2-NEXT: [[TMP49:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK2-NEXT: [[TMP50:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK2-NEXT: [[TMP51:%.*]] = and i1 [[TMP49]], [[TMP50]] -// CHECK2-NEXT: br i1 [[TMP51]], label [[THEN6:%.*]], label [[ELSE7:%.*]] -// CHECK2: then6: -// CHECK2-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP53:%.*]] = load i8*, i8** [[TMP52]], align 8 -// CHECK2-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP55:%.*]] = load i8*, i8** [[TMP54]], align 8 -// CHECK2-NEXT: [[TMP56:%.*]] = bitcast i8* [[TMP53]] to i32* -// CHECK2-NEXT: [[TMP57:%.*]] = bitcast i8* [[TMP55]] to i32* -// CHECK2-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP56]], align 4 -// CHECK2-NEXT: store i32 [[TMP58]], i32* [[TMP57]], align 4 -// CHECK2-NEXT: [[TMP59:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP60:%.*]] = load i8*, i8** [[TMP59]], align 8 -// CHECK2-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP62:%.*]] = load i8*, i8** [[TMP61]], align 8 -// CHECK2-NEXT: [[TMP63:%.*]] = bitcast i8* [[TMP60]] to i16* -// CHECK2-NEXT: [[TMP64:%.*]] = bitcast i8* [[TMP62]] to i16* -// CHECK2-NEXT: [[TMP65:%.*]] = load i16, i16* [[TMP63]], align 2 -// CHECK2-NEXT: store i16 [[TMP65]], i16* [[TMP64]], align 2 -// CHECK2-NEXT: br label [[IFCONT8:%.*]] -// CHECK2: else7: -// CHECK2-NEXT: br label [[IFCONT8]] -// CHECK2: ifcont8: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func16 -// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK2-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK2-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 -// CHECK2-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK2-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 -// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK2-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK2-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK2: then: -// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK2-NEXT: store volatile i32 [[TMP9]], i32 addrspace(3)* [[TMP8]], align 4 -// CHECK2-NEXT: br label [[IFCONT:%.*]] -// CHECK2: else: -// CHECK2-NEXT: br label [[IFCONT]] -// CHECK2: ifcont: -// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] -// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK2: then4: -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8 -// CHECK2-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to i32* -// CHECK2-NEXT: [[TMP15:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP11]], align 4 -// CHECK2-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 -// CHECK2-NEXT: br label [[IFCONT6:%.*]] -// CHECK2: else5: -// CHECK2-NEXT: br label [[IFCONT6]] -// CHECK2: ifcont6: -// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK2-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK2-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] -// CHECK2: then8: -// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK2-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i16* -// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK2-NEXT: [[TMP20:%.*]] = bitcast i32 addrspace(3)* [[TMP19]] to i16 addrspace(3)* -// CHECK2-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP18]], align 2 -// CHECK2-NEXT: store volatile i16 [[TMP21]], i16 addrspace(3)* [[TMP20]], align 2 -// CHECK2-NEXT: br label [[IFCONT10:%.*]] -// CHECK2: else9: -// CHECK2-NEXT: br label [[IFCONT10]] -// CHECK2: ifcont10: -// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP22]] -// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] -// CHECK2: then12: -// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK2-NEXT: [[TMP24:%.*]] = bitcast i32 addrspace(3)* [[TMP23]] to i16 addrspace(3)* -// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP26:%.*]] = load i8*, i8** [[TMP25]], align 8 -// CHECK2-NEXT: [[TMP27:%.*]] = bitcast i8* [[TMP26]] to i16* -// CHECK2-NEXT: [[TMP28:%.*]] = load volatile i16, i16 addrspace(3)* [[TMP24]], align 2 -// CHECK2-NEXT: store i16 [[TMP28]], i16* [[TMP27]], align 2 -// CHECK2-NEXT: br label [[IFCONT14:%.*]] -// CHECK2: else13: -// CHECK2-NEXT: br label [[IFCONT14]] -// CHECK2: ifcont14: -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func17 -// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK2-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.5* -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_5:%.*]], %struct._globalized_locals_ty.5* [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP7]] -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK2-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 128 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i16* -// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_5]], %struct._globalized_locals_ty.5* [[TMP6]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP7]] -// CHECK2-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP15]], align 2 -// CHECK2-NEXT: store i16 [[TMP17]], i16* [[TMP16]], align 128 -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func18 -// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 -// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.5* -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_5:%.*]], %struct._globalized_locals_ty.5* [[TMP4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* -// CHECK2-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_5]], %struct._globalized_locals_ty.5* [[TMP4]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to i8* -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 8 -// CHECK2-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK2-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP12]], i8* [[TMP13]]) #[[ATTR3]] -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func19 -// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK2-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.5* -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_5:%.*]], %struct._globalized_locals_ty.5* [[TMP6]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP7]] -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 128 -// CHECK2-NEXT: store i32 [[TMP12]], i32* [[TMP10]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 -// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i16* -// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_5]], %struct._globalized_locals_ty.5* [[TMP6]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP7]] -// CHECK2-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP16]], align 128 -// CHECK2-NEXT: store i16 [[TMP17]], i16* [[TMP15]], align 2 -// CHECK2-NEXT: ret void -// -// -// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func20 -// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK2-NEXT: entry: -// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 -// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 -// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 -// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 -// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.5* -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 -// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_5:%.*]], %struct._globalized_locals_ty.5* [[TMP4]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* -// CHECK2-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 -// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_5]], %struct._globalized_locals_ty.5* [[TMP4]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP5]] -// CHECK2-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to i8* -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 8 -// CHECK2-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK2-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 -// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP13]], i8* [[TMP12]]) #[[ATTR3]] -// CHECK2-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_worker -// CHECK3-SAME: () #[[ATTR0:[0-9]+]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK3-NEXT: store i8* null, i8** [[WORK_FN]], align 4 -// CHECK3-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK3-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK3: .await.work: -// CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK3-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK3-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK3-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK3-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK3-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK3: .select.workers: -// CHECK3-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK3-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK3-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK3: .execute.parallel: -// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK3-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) -// CHECK3-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK3: .terminate.parallel: -// CHECK3-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK3-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK3: .barrier.parallel: -// CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK3-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK3: .exit: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23 -// CHECK3-SAME: (double* nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[E_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[E7:%.*]] = alloca double, align 8 -// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK3-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 -// CHECK3-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK3-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK3-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK3-NEXT: br i1 [[TMP1]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK3: .worker: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_worker() #[[ATTR3:[0-9]+]] -// CHECK3-NEXT: br label [[DOTEXIT:%.*]] -// CHECK3: .mastercheck: -// CHECK3-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK3-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK3-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK3-NEXT: [[TMP3:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK3-NEXT: [[TMP4:%.*]] = xor i32 [[TMP2]], -1 -// CHECK3-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP3]], [[TMP4]] -// CHECK3-NEXT: [[TMP5:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK3-NEXT: br i1 [[TMP5]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK3: .master: -// CHECK3-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK3-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK3-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP7:%.*]] = load double, double* [[TMP0]], align 8 -// CHECK3-NEXT: store double [[TMP7]], double* [[E7]], align 8 -// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E7]]) #[[ATTR3]] -// CHECK3-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK3: .termination.notifier: -// CHECK3-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK3-NEXT: br label [[DOTEXIT]] -// CHECK3: .exit: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], double* nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[E_ADDR:%.*]] = alloca double*, align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* @"_openmp_static_kernel$size", align 4 -// CHECK3-NEXT: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i32 [[TMP2]], i16 [[TMP1]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) -// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds i8, i8* [[TMP3]], i32 0 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to %struct._globalized_locals_ty* -// CHECK3-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: store double 0.000000e+00, double* [[E1]], align 8 -// CHECK3-NEXT: [[TMP6:%.*]] = load double, double* [[E1]], align 8 -// CHECK3-NEXT: [[ADD:%.*]] = fadd double [[TMP6]], 5.000000e+00 -// CHECK3-NEXT: store double [[ADD]], double* [[E1]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = bitcast double* [[E1]] to i8* -// CHECK3-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP12:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i8* [[TMP12]], i32 1024, i8* [[TMP11]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func) -// CHECK3-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 1 -// CHECK3-NEXT: br i1 [[TMP14]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] -// CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[TMP15:%.*]] = load double, double* [[TMP0]], align 8 -// CHECK3-NEXT: [[TMP16:%.*]] = load double, double* [[E1]], align 8 -// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP15]], [[TMP16]] -// CHECK3-NEXT: store double [[ADD2]], double* [[TMP0]], align 8 -// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP8]]) -// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] -// CHECK3: .omp.reduction.done: -// CHECK3-NEXT: [[TMP17:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 -// CHECK3-NEXT: call void @__kmpc_restore_team_static_memory(i16 0, i16 [[TMP17]]) -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func -// CHECK3-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8 -// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 -// CHECK3-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* -// CHECK3-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 -// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to double* -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP12]], i32 1 -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8* -// CHECK3-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP12]] to i64* -// CHECK3-NEXT: [[TMP16:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i64* -// CHECK3-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 8 -// CHECK3-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK3-NEXT: [[TMP18:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 -// CHECK3-NEXT: [[TMP19:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP18]]) -// CHECK3-NEXT: store i64 [[TMP19]], i64* [[TMP16]], align 8 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr i64, i64* [[TMP15]], i32 1 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP16]], i32 1 -// CHECK3-NEXT: [[TMP22:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK3-NEXT: store i8* [[TMP22]], i8** [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK3-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK3-NEXT: [[TMP25:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: [[TMP26:%.*]] = and i1 [[TMP24]], [[TMP25]] -// CHECK3-NEXT: [[TMP27:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK3-NEXT: [[TMP28:%.*]] = and i16 [[TMP6]], 1 -// CHECK3-NEXT: [[TMP29:%.*]] = icmp eq i16 [[TMP28]], 0 -// CHECK3-NEXT: [[TMP30:%.*]] = and i1 [[TMP27]], [[TMP29]] -// CHECK3-NEXT: [[TMP31:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK3-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]] -// CHECK3-NEXT: [[TMP33:%.*]] = or i1 [[TMP23]], [[TMP26]] -// CHECK3-NEXT: [[TMP34:%.*]] = or i1 [[TMP33]], [[TMP32]] -// CHECK3-NEXT: br i1 [[TMP34]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK3: then: -// CHECK3-NEXT: [[TMP35:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* -// CHECK3-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR3]] -// CHECK3-NEXT: br label [[IFCONT:%.*]] -// CHECK3: else: -// CHECK3-NEXT: br label [[IFCONT]] -// CHECK3: ifcont: -// CHECK3-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK3-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] -// CHECK3-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK3: then4: -// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP41:%.*]] = load i8*, i8** [[TMP40]], align 4 -// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 4 -// CHECK3-NEXT: [[TMP44:%.*]] = bitcast i8* [[TMP41]] to double* -// CHECK3-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP43]] to double* -// CHECK3-NEXT: [[TMP46:%.*]] = load double, double* [[TMP44]], align 8 -// CHECK3-NEXT: store double [[TMP46]], double* [[TMP45]], align 8 -// CHECK3-NEXT: br label [[IFCONT6:%.*]] -// CHECK3: else5: -// CHECK3-NEXT: br label [[IFCONT6]] -// CHECK3: ifcont6: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func -// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK3-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK3-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 -// CHECK3-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* -// CHECK3-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4 -// CHECK3-NEXT: br label [[PRECOND:%.*]] -// CHECK3: precond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP5]], 2 -// CHECK3-NEXT: br i1 [[TMP6]], label [[BODY:%.*]], label [[EXIT:%.*]] -// CHECK3: body: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) -// CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK3: then: -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[TMP9]], i32 [[TMP5]] -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: store volatile i32 [[TMP12]], i32 addrspace(3)* [[TMP11]], align 4 -// CHECK3-NEXT: br label [[IFCONT:%.*]] -// CHECK3: else: -// CHECK3-NEXT: br label [[IFCONT]] -// CHECK3: ifcont: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP13]] -// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK3: then4: -// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i32* -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP17]], i32 [[TMP5]] -// CHECK3-NEXT: [[TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4 -// CHECK3-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4 -// CHECK3-NEXT: br label [[IFCONT6:%.*]] -// CHECK3: else5: -// CHECK3-NEXT: br label [[IFCONT6]] -// CHECK3: ifcont6: -// CHECK3-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTCNT_ADDR]], align 4 -// CHECK3-NEXT: br label [[PRECOND]] -// CHECK3: exit: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func -// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* -// CHECK3-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.0* -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to double* -// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x double], [1024 x double]* [[E]], i32 0, i32 [[TMP7]] -// CHECK3-NEXT: [[TMP12:%.*]] = load double, double* [[TMP10]], align 8 -// CHECK3-NEXT: store double [[TMP12]], double* [[TMP11]], align 128 -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func -// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 -// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.0* -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x double], [1024 x double]* [[E]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: [[TMP8:%.*]] = bitcast double* [[TMP7]] to i8* -// CHECK3-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP9]], i8* [[TMP10]]) #[[ATTR3]] -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func -// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* -// CHECK3-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.0* -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to double* -// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x double], [1024 x double]* [[E]], i32 0, i32 [[TMP7]] -// CHECK3-NEXT: [[TMP12:%.*]] = load double, double* [[TMP11]], align 128 -// CHECK3-NEXT: store double [[TMP12]], double* [[TMP10]], align 8 -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func -// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 -// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.0* -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x double], [1024 x double]* [[E]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: [[TMP8:%.*]] = bitcast double* [[TMP7]] to i8* -// CHECK3-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP10]], i8* [[TMP9]]) #[[ATTR3]] -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_worker -// CHECK3-SAME: () #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK3-NEXT: store i8* null, i8** [[WORK_FN]], align 4 -// CHECK3-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK3-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK3: .await.work: -// CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK3-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK3-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK3-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK3-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK3-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK3: .select.workers: -// CHECK3-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK3-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK3-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK3: .execute.parallel: -// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK3-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) -// CHECK3-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK3: .terminate.parallel: -// CHECK3-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK3-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK3: .barrier.parallel: -// CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK3-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK3: .exit: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29 -// CHECK3-SAME: (i32 [[C:%.*]], i32 [[D:%.*]]) #[[ATTR1]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[C]], i32* [[C_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[D]], i32* [[D_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[C_ADDR]] to i8* -// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[D_ADDR]] to float* -// CHECK3-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK3-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK3-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK3-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK3: .worker: -// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_worker() #[[ATTR3]] -// CHECK3-NEXT: br label [[DOTEXIT:%.*]] -// CHECK3: .mastercheck: -// CHECK3-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK3-NEXT: [[NVPTX_NUM_THREADS3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: [[NVPTX_WARP_SIZE4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK3-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE4]], 1 -// CHECK3-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS3]], 1 -// CHECK3-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK3-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK3-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID2]], [[MASTER_TID]] -// CHECK3-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK3: .master: -// CHECK3-NEXT: [[NVPTX_NUM_THREADS5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: [[NVPTX_WARP_SIZE6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK3-NEXT: [[THREAD_LIMIT7:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS5]], [[NVPTX_WARP_SIZE6]] -// CHECK3-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT7]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK3-NEXT: [[TMP5:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared1", align 2 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* @"_openmp_static_kernel$size2", align 4 -// CHECK3-NEXT: call void @__kmpc_get_team_static_memory(i16 0, i8* addrspacecast (i8 addrspace(3)* getelementptr inbounds (%"union._shared_openmp_static_memory_type_$_", %"union._shared_openmp_static_memory_type_$_" addrspace(3)* @"_openmp_shared_static_glob_rd_$_", i32 0, i32 0, i32 0) to i8*), i32 [[TMP6]], i16 [[TMP5]], i8** addrspacecast (i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr" to i8**)) -// CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds i8, i8* [[TMP7]], i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to %struct._globalized_locals_ty.1* -// CHECK3-NEXT: [[TMP10:%.*]] = load i8, i8* [[CONV]], align 4 -// CHECK3-NEXT: [[C8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP9]], i32 0, i32 1 -// CHECK3-NEXT: store i8 [[TMP10]], i8* [[C8]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load float, float* [[CONV1]], align 4 -// CHECK3-NEXT: [[D9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP9]], i32 0, i32 0 -// CHECK3-NEXT: store float [[TMP11]], float* [[D9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__3(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C8]], float* [[D9]]) #[[ATTR3]] -// CHECK3-NEXT: [[TMP13:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared1", align 2 -// CHECK3-NEXT: call void @__kmpc_restore_team_static_memory(i16 0, i16 [[TMP13]]) -// CHECK3-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK3: .termination.notifier: -// CHECK3-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK3-NEXT: br label [[DOTEXIT]] -// CHECK3: .exit: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__3 -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8* nonnull align 1 dereferenceable(1) [[C:%.*]], float* nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 4 -// CHECK3-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = load i8*, i8* addrspace(3)* @"_openmp_kernel_static_glob_rd$ptr", align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = getelementptr inbounds i8, i8* [[TMP2]], i32 8 -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.2* -// CHECK3-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], %struct._globalized_locals_ty.2* [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[D2:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], %struct._globalized_locals_ty.2* [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: store i8 0, i8* [[C1]], align 4 -// CHECK3-NEXT: store float 1.000000e+00, float* [[D2]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[C1]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP5]] to i32 -// CHECK3-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2 -// CHECK3-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8 -// CHECK3-NEXT: store i8 [[CONV3]], i8* [[C1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load float, float* [[D2]], align 4 -// CHECK3-NEXT: [[MUL:%.*]] = fmul float [[TMP6]], 3.300000e+01 -// CHECK3-NEXT: store float [[MUL]], float* [[D2]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: store i8* [[C1]], i8** [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP11:%.*]] = bitcast float* [[D2]] to i8* -// CHECK3-NEXT: store i8* [[TMP11]], i8** [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP13:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP8]], i8* [[TMP13]], i32 1024, i8* [[TMP12]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func5, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func6, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func7, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func8, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func9, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func10) -// CHECK3-NEXT: [[TMP15:%.*]] = icmp eq i32 [[TMP14]], 1 -// CHECK3-NEXT: br i1 [[TMP15]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] -// CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[TMP16:%.*]] = load i8, i8* [[TMP0]], align 1 -// CHECK3-NEXT: [[CONV4:%.*]] = sext i8 [[TMP16]] to i32 -// CHECK3-NEXT: [[TMP17:%.*]] = load i8, i8* [[C1]], align 4 -// CHECK3-NEXT: [[CONV5:%.*]] = sext i8 [[TMP17]] to i32 -// CHECK3-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] -// CHECK3-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 -// CHECK3-NEXT: store i8 [[CONV7]], i8* [[TMP0]], align 1 -// CHECK3-NEXT: [[TMP18:%.*]] = load float, float* [[TMP1]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load float, float* [[D2]], align 4 -// CHECK3-NEXT: [[MUL8:%.*]] = fmul float [[TMP18]], [[TMP19]] -// CHECK3-NEXT: store float [[MUL8]], float* [[TMP1]], align 4 -// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP8]]) -// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] -// CHECK3: .omp.reduction.done: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func5 -// CHECK3-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i8, align 1 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca float, align 4 -// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 -// CHECK3-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* -// CHECK3-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 -// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[TMP10]], i32 1 -// CHECK3-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP10]], align 1 -// CHECK3-NEXT: [[TMP14:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK3-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK3-NEXT: [[TMP15:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 -// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP14]], i16 [[TMP7]], i16 [[TMP15]]) -// CHECK3-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8 -// CHECK3-NEXT: store i8 [[TMP17]], i8* [[DOTOMP_REDUCTION_ELEMENT]], align 1 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr i8, i8* [[TMP10]], i32 1 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK3-NEXT: store i8* [[DOTOMP_REDUCTION_ELEMENT]], i8** [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP23:%.*]] = bitcast i8* [[TMP21]] to float* -// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr float, float* [[TMP23]], i32 1 -// CHECK3-NEXT: [[TMP25:%.*]] = bitcast float* [[TMP24]] to i8* -// CHECK3-NEXT: [[TMP26:%.*]] = bitcast float* [[TMP23]] to i32* -// CHECK3-NEXT: [[TMP27:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i32* -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK3-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK3-NEXT: [[TMP29:%.*]] = trunc i32 [[NVPTX_WARP_SIZE5]] to i16 -// CHECK3-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP28]], i16 [[TMP7]], i16 [[TMP29]]) -// CHECK3-NEXT: store i32 [[TMP30]], i32* [[TMP27]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr i32, i32* [[TMP26]], i32 1 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr i32, i32* [[TMP27]], i32 1 -// CHECK3-NEXT: [[TMP33:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* -// CHECK3-NEXT: store i8* [[TMP33]], i8** [[TMP22]], align 4 -// CHECK3-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK3-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK3-NEXT: [[TMP36:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: [[TMP37:%.*]] = and i1 [[TMP35]], [[TMP36]] -// CHECK3-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK3-NEXT: [[TMP39:%.*]] = and i16 [[TMP6]], 1 -// CHECK3-NEXT: [[TMP40:%.*]] = icmp eq i16 [[TMP39]], 0 -// CHECK3-NEXT: [[TMP41:%.*]] = and i1 [[TMP38]], [[TMP40]] -// CHECK3-NEXT: [[TMP42:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK3-NEXT: [[TMP43:%.*]] = and i1 [[TMP41]], [[TMP42]] -// CHECK3-NEXT: [[TMP44:%.*]] = or i1 [[TMP34]], [[TMP37]] -// CHECK3-NEXT: [[TMP45:%.*]] = or i1 [[TMP44]], [[TMP43]] -// CHECK3-NEXT: br i1 [[TMP45]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK3: then: -// CHECK3-NEXT: [[TMP46:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* -// CHECK3-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func4"(i8* [[TMP46]], i8* [[TMP47]]) #[[ATTR3]] -// CHECK3-NEXT: br label [[IFCONT:%.*]] -// CHECK3: else: -// CHECK3-NEXT: br label [[IFCONT]] -// CHECK3: ifcont: -// CHECK3-NEXT: [[TMP48:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK3-NEXT: [[TMP49:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: [[TMP50:%.*]] = and i1 [[TMP48]], [[TMP49]] -// CHECK3-NEXT: br i1 [[TMP50]], label [[THEN6:%.*]], label [[ELSE7:%.*]] -// CHECK3: then6: -// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP52:%.*]] = load i8*, i8** [[TMP51]], align 4 -// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP54:%.*]] = load i8*, i8** [[TMP53]], align 4 -// CHECK3-NEXT: [[TMP55:%.*]] = load i8, i8* [[TMP52]], align 1 -// CHECK3-NEXT: store i8 [[TMP55]], i8* [[TMP54]], align 1 -// CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP57:%.*]] = load i8*, i8** [[TMP56]], align 4 -// CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP59:%.*]] = load i8*, i8** [[TMP58]], align 4 -// CHECK3-NEXT: [[TMP60:%.*]] = bitcast i8* [[TMP57]] to float* -// CHECK3-NEXT: [[TMP61:%.*]] = bitcast i8* [[TMP59]] to float* -// CHECK3-NEXT: [[TMP62:%.*]] = load float, float* [[TMP60]], align 4 -// CHECK3-NEXT: store float [[TMP62]], float* [[TMP61]], align 4 -// CHECK3-NEXT: br label [[IFCONT8:%.*]] -// CHECK3: else7: -// CHECK3-NEXT: br label [[IFCONT8]] -// CHECK3: ifcont8: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func6 -// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK3-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK3-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 -// CHECK3-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK3: then: -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i32 addrspace(3)* [[TMP7]] to i8 addrspace(3)* -// CHECK3-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP6]], align 1 -// CHECK3-NEXT: store volatile i8 [[TMP9]], i8 addrspace(3)* [[TMP8]], align 1 -// CHECK3-NEXT: br label [[IFCONT:%.*]] -// CHECK3: else: -// CHECK3-NEXT: br label [[IFCONT]] -// CHECK3: ifcont: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] -// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK3: then4: -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK3-NEXT: [[TMP12:%.*]] = bitcast i32 addrspace(3)* [[TMP11]] to i8 addrspace(3)* -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load volatile i8, i8 addrspace(3)* [[TMP12]], align 1 -// CHECK3-NEXT: store i8 [[TMP15]], i8* [[TMP14]], align 1 -// CHECK3-NEXT: br label [[IFCONT6:%.*]] -// CHECK3: else5: -// CHECK3-NEXT: br label [[IFCONT6]] -// CHECK3: ifcont6: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK3-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK3-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] -// CHECK3: then8: -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i32* -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK3-NEXT: store volatile i32 [[TMP20]], i32 addrspace(3)* [[TMP19]], align 4 -// CHECK3-NEXT: br label [[IFCONT10:%.*]] -// CHECK3: else9: -// CHECK3-NEXT: br label [[IFCONT10]] -// CHECK3: ifcont10: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP21]] -// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] -// CHECK3: then12: -// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 4 -// CHECK3-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to i32* -// CHECK3-NEXT: [[TMP26:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP22]], align 4 -// CHECK3-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4 -// CHECK3-NEXT: br label [[IFCONT14:%.*]] -// CHECK3: else13: -// CHECK3-NEXT: br label [[IFCONT14]] -// CHECK3: ifcont14: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func7 -// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK3-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.3* -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 -// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3:%.*]], %struct._globalized_locals_ty.3* [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP7]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP9]], align 1 -// CHECK3-NEXT: store i8 [[TMP11]], i8* [[TMP10]], align 128 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to float* -// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3]], %struct._globalized_locals_ty.3* [[TMP6]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP7]] -// CHECK3-NEXT: [[TMP16:%.*]] = load float, float* [[TMP14]], align 4 -// CHECK3-NEXT: store float [[TMP16]], float* [[TMP15]], align 128 -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func8 -// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.3* -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3:%.*]], %struct._globalized_locals_ty.3* [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3]], %struct._globalized_locals_ty.3* [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to i8* -// CHECK3-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func4"(i8* [[TMP11]], i8* [[TMP12]]) #[[ATTR3]] -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func9 -// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK3-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.3* -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 -// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3:%.*]], %struct._globalized_locals_ty.3* [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP7]] -// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 128 -// CHECK3-NEXT: store i8 [[TMP11]], i8* [[TMP9]], align 1 -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to float* -// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3]], %struct._globalized_locals_ty.3* [[TMP6]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP7]] -// CHECK3-NEXT: [[TMP16:%.*]] = load float, float* [[TMP15]], align 128 -// CHECK3-NEXT: store float [[TMP16]], float* [[TMP14]], align 4 -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func10 -// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.3* -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3:%.*]], %struct._globalized_locals_ty.3* [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3]], %struct._globalized_locals_ty.3* [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to i8* -// CHECK3-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func4"(i8* [[TMP12]], i8* [[TMP11]]) #[[ATTR3]] -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l36 -// CHECK3-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) #[[ATTR1]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK3-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() -// CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK3: .execute: -// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK3-NEXT: call void @__omp_outlined__11(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[A_ADDR]], i16* [[CONV]]) #[[ATTR3]] -// CHECK3-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK3: .omp.deinit: -// CHECK3-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) -// CHECK3-NEXT: br label [[DOTEXIT:%.*]] -// CHECK3: .exit: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__11 -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i16* nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 -// CHECK3-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B2:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK3-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[A1]], align 4 -// CHECK3-NEXT: store i16 -32768, i16* [[B2]], align 2 -// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP3:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK3-NEXT: store i8* [[TMP3]], i8** [[TMP2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i16* [[B2]] to i8* -// CHECK3-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*)* @__omp_outlined__12 to i8*), i8* null, i8** [[TMP8]], i32 2) -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK3-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP12:%.*]] = bitcast i16* [[B2]] to i8* -// CHECK3-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i8* [[TMP14]], i32 1024, i8* [[TMP13]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func17, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func18, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func19, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func20, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func21, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func22) -// CHECK3-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1 -// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] -// CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK3-NEXT: [[OR:%.*]] = or i32 [[TMP17]], [[TMP18]] -// CHECK3-NEXT: store i32 [[OR]], i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32 -// CHECK3-NEXT: [[TMP20:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP20]] to i32 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV3]] -// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP22:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] -// CHECK3-NEXT: store i16 [[COND]], i16* [[TMP1]], align 2 -// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP7]]) -// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] -// CHECK3: .omp.reduction.done: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__12 -// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i16* nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 -// CHECK3-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[B2:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK3-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK3-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 -// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[A1]], align 4 -// CHECK3-NEXT: store i16 -32768, i16* [[B2]], align 2 -// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK3-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1 -// CHECK3-NEXT: store i32 [[OR]], i32* [[A1]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 -// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]] -// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK3: cond.true: -// CHECK3-NEXT: br label [[COND_END:%.*]] -// CHECK3: cond.false: -// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32 -// CHECK3-NEXT: br label [[COND_END]] -// CHECK3: cond.end: -// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], i16* [[B2]], align 2 -// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK3-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP10:%.*]] = bitcast i16* [[B2]] to i8* -// CHECK3-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 2, i32 8, i8* [[TMP11]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func14, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func15) -// CHECK3-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 -// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] -// CHECK3: .omp.reduction.then: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK3-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]] -// CHECK3-NEXT: store i32 [[OR5]], i32* [[TMP0]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK3-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK3-NEXT: [[TMP17:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK3-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32 -// CHECK3-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] -// CHECK3-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] -// CHECK3: cond.true9: -// CHECK3-NEXT: [[TMP18:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK3-NEXT: br label [[COND_END11:%.*]] -// CHECK3: cond.false10: -// CHECK3-NEXT: [[TMP19:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK3-NEXT: br label [[COND_END11]] -// CHECK3: cond.end11: -// CHECK3-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ] -// CHECK3-NEXT: store i16 [[COND12]], i16* [[TMP1]], align 2 -// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) -// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] -// CHECK3: .omp.reduction.done: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func14 -// CHECK3-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 -// CHECK3-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* -// CHECK3-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 -// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK3-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK3-NEXT: [[TMP16:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP16]]) -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK3-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK3-NEXT: store i8* [[TMP20]], i8** [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP22]] to i16* -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 -// CHECK3-NEXT: [[TMP26:%.*]] = bitcast i16* [[TMP25]] to i8* -// CHECK3-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP24]], align 2 -// CHECK3-NEXT: [[TMP28:%.*]] = sext i16 [[TMP27]] to i32 -// CHECK3-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK3-NEXT: [[TMP29:%.*]] = trunc i32 [[NVPTX_WARP_SIZE5]] to i16 -// CHECK3-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP28]], i16 [[TMP7]], i16 [[TMP29]]) -// CHECK3-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 -// CHECK3-NEXT: store i16 [[TMP31]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 -// CHECK3-NEXT: [[TMP34:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* -// CHECK3-NEXT: store i8* [[TMP34]], i8** [[TMP23]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK3-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK3-NEXT: [[TMP37:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: [[TMP38:%.*]] = and i1 [[TMP36]], [[TMP37]] -// CHECK3-NEXT: [[TMP39:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK3-NEXT: [[TMP40:%.*]] = and i16 [[TMP6]], 1 -// CHECK3-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP40]], 0 -// CHECK3-NEXT: [[TMP42:%.*]] = and i1 [[TMP39]], [[TMP41]] -// CHECK3-NEXT: [[TMP43:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK3-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] -// CHECK3-NEXT: [[TMP45:%.*]] = or i1 [[TMP35]], [[TMP38]] -// CHECK3-NEXT: [[TMP46:%.*]] = or i1 [[TMP45]], [[TMP44]] -// CHECK3-NEXT: br i1 [[TMP46]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK3: then: -// CHECK3-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* -// CHECK3-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func13"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR3]] -// CHECK3-NEXT: br label [[IFCONT:%.*]] -// CHECK3: else: -// CHECK3-NEXT: br label [[IFCONT]] -// CHECK3: ifcont: -// CHECK3-NEXT: [[TMP49:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK3-NEXT: [[TMP50:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: [[TMP51:%.*]] = and i1 [[TMP49]], [[TMP50]] -// CHECK3-NEXT: br i1 [[TMP51]], label [[THEN6:%.*]], label [[ELSE7:%.*]] -// CHECK3: then6: -// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP53:%.*]] = load i8*, i8** [[TMP52]], align 4 -// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP55:%.*]] = load i8*, i8** [[TMP54]], align 4 -// CHECK3-NEXT: [[TMP56:%.*]] = bitcast i8* [[TMP53]] to i32* -// CHECK3-NEXT: [[TMP57:%.*]] = bitcast i8* [[TMP55]] to i32* -// CHECK3-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP56]], align 4 -// CHECK3-NEXT: store i32 [[TMP58]], i32* [[TMP57]], align 4 -// CHECK3-NEXT: [[TMP59:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP60:%.*]] = load i8*, i8** [[TMP59]], align 4 -// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP62:%.*]] = load i8*, i8** [[TMP61]], align 4 -// CHECK3-NEXT: [[TMP63:%.*]] = bitcast i8* [[TMP60]] to i16* -// CHECK3-NEXT: [[TMP64:%.*]] = bitcast i8* [[TMP62]] to i16* -// CHECK3-NEXT: [[TMP65:%.*]] = load i16, i16* [[TMP63]], align 2 -// CHECK3-NEXT: store i16 [[TMP65]], i16* [[TMP64]], align 2 -// CHECK3-NEXT: br label [[IFCONT8:%.*]] -// CHECK3: else7: -// CHECK3-NEXT: br label [[IFCONT8]] -// CHECK3: ifcont8: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func15 -// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK3-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK3-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 -// CHECK3-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP2]]) -// CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK3: then: -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: store volatile i32 [[TMP9]], i32 addrspace(3)* [[TMP8]], align 4 -// CHECK3-NEXT: br label [[IFCONT:%.*]] -// CHECK3: else: -// CHECK3-NEXT: br label [[IFCONT]] -// CHECK3: ifcont: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] -// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK3: then4: -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to i32* -// CHECK3-NEXT: [[TMP15:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP11]], align 4 -// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 -// CHECK3-NEXT: br label [[IFCONT6:%.*]] -// CHECK3: else5: -// CHECK3-NEXT: br label [[IFCONT6]] -// CHECK3: ifcont6: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK3-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK3-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] -// CHECK3: then8: -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i16* -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK3-NEXT: [[TMP20:%.*]] = bitcast i32 addrspace(3)* [[TMP19]] to i16 addrspace(3)* -// CHECK3-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP18]], align 2 -// CHECK3-NEXT: store volatile i16 [[TMP21]], i16 addrspace(3)* [[TMP20]], align 2 -// CHECK3-NEXT: br label [[IFCONT10:%.*]] -// CHECK3: else9: -// CHECK3-NEXT: br label [[IFCONT10]] -// CHECK3: ifcont10: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP22]] -// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] -// CHECK3: then12: -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK3-NEXT: [[TMP24:%.*]] = bitcast i32 addrspace(3)* [[TMP23]] to i16 addrspace(3)* -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP26:%.*]] = load i8*, i8** [[TMP25]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = bitcast i8* [[TMP26]] to i16* -// CHECK3-NEXT: [[TMP28:%.*]] = load volatile i16, i16 addrspace(3)* [[TMP24]], align 2 -// CHECK3-NEXT: store i16 [[TMP28]], i16* [[TMP27]], align 2 -// CHECK3-NEXT: br label [[IFCONT14:%.*]] -// CHECK3: else13: -// CHECK3-NEXT: br label [[IFCONT14]] -// CHECK3: ifcont14: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func17 -// CHECK3-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2 -// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 -// CHECK3-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 -// CHECK3-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 -// CHECK3-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* -// CHECK3-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 -// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK3-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK3-NEXT: [[TMP16:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 -// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP16]]) -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK3-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK3-NEXT: store i8* [[TMP20]], i8** [[TMP11]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 4 -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP22]] to i16* -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 -// CHECK3-NEXT: [[TMP26:%.*]] = bitcast i16* [[TMP25]] to i8* -// CHECK3-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP24]], align 2 -// CHECK3-NEXT: [[TMP28:%.*]] = sext i16 [[TMP27]] to i32 -// CHECK3-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK3-NEXT: [[TMP29:%.*]] = trunc i32 [[NVPTX_WARP_SIZE5]] to i16 -// CHECK3-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP28]], i16 [[TMP7]], i16 [[TMP29]]) -// CHECK3-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 -// CHECK3-NEXT: store i16 [[TMP31]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 -// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 -// CHECK3-NEXT: [[TMP34:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* -// CHECK3-NEXT: store i8* [[TMP34]], i8** [[TMP23]], align 4 -// CHECK3-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK3-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK3-NEXT: [[TMP37:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: [[TMP38:%.*]] = and i1 [[TMP36]], [[TMP37]] -// CHECK3-NEXT: [[TMP39:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK3-NEXT: [[TMP40:%.*]] = and i16 [[TMP6]], 1 -// CHECK3-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP40]], 0 -// CHECK3-NEXT: [[TMP42:%.*]] = and i1 [[TMP39]], [[TMP41]] -// CHECK3-NEXT: [[TMP43:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK3-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] -// CHECK3-NEXT: [[TMP45:%.*]] = or i1 [[TMP35]], [[TMP38]] -// CHECK3-NEXT: [[TMP46:%.*]] = or i1 [[TMP45]], [[TMP44]] -// CHECK3-NEXT: br i1 [[TMP46]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK3: then: -// CHECK3-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* -// CHECK3-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func16"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR3]] -// CHECK3-NEXT: br label [[IFCONT:%.*]] -// CHECK3: else: -// CHECK3-NEXT: br label [[IFCONT]] -// CHECK3: ifcont: -// CHECK3-NEXT: [[TMP49:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK3-NEXT: [[TMP50:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK3-NEXT: [[TMP51:%.*]] = and i1 [[TMP49]], [[TMP50]] -// CHECK3-NEXT: br i1 [[TMP51]], label [[THEN6:%.*]], label [[ELSE7:%.*]] -// CHECK3: then6: -// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP53:%.*]] = load i8*, i8** [[TMP52]], align 4 -// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP55:%.*]] = load i8*, i8** [[TMP54]], align 4 -// CHECK3-NEXT: [[TMP56:%.*]] = bitcast i8* [[TMP53]] to i32* -// CHECK3-NEXT: [[TMP57:%.*]] = bitcast i8* [[TMP55]] to i32* -// CHECK3-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP56]], align 4 -// CHECK3-NEXT: store i32 [[TMP58]], i32* [[TMP57]], align 4 -// CHECK3-NEXT: [[TMP59:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP60:%.*]] = load i8*, i8** [[TMP59]], align 4 -// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP62:%.*]] = load i8*, i8** [[TMP61]], align 4 -// CHECK3-NEXT: [[TMP63:%.*]] = bitcast i8* [[TMP60]] to i16* -// CHECK3-NEXT: [[TMP64:%.*]] = bitcast i8* [[TMP62]] to i16* -// CHECK3-NEXT: [[TMP65:%.*]] = load i16, i16* [[TMP63]], align 2 -// CHECK3-NEXT: store i16 [[TMP65]], i16* [[TMP64]], align 2 -// CHECK3-NEXT: br label [[IFCONT8:%.*]] -// CHECK3: else7: -// CHECK3-NEXT: br label [[IFCONT8]] -// CHECK3: ifcont8: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func18 -// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK3-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK3-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 -// CHECK3-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK3: then: -// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK3-NEXT: store volatile i32 [[TMP9]], i32 addrspace(3)* [[TMP8]], align 4 -// CHECK3-NEXT: br label [[IFCONT:%.*]] -// CHECK3: else: -// CHECK3-NEXT: br label [[IFCONT]] -// CHECK3: ifcont: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] -// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK3: then4: -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to i32* -// CHECK3-NEXT: [[TMP15:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP11]], align 4 -// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 -// CHECK3-NEXT: br label [[IFCONT6:%.*]] -// CHECK3: else5: -// CHECK3-NEXT: br label [[IFCONT6]] -// CHECK3: ifcont6: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK3-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK3-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] -// CHECK3: then8: -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 4 -// CHECK3-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i16* -// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK3-NEXT: [[TMP20:%.*]] = bitcast i32 addrspace(3)* [[TMP19]] to i16 addrspace(3)* -// CHECK3-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP18]], align 2 -// CHECK3-NEXT: store volatile i16 [[TMP21]], i16 addrspace(3)* [[TMP20]], align 2 -// CHECK3-NEXT: br label [[IFCONT10:%.*]] -// CHECK3: else9: -// CHECK3-NEXT: br label [[IFCONT10]] -// CHECK3: ifcont10: -// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP22]] -// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] -// CHECK3: then12: -// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK3-NEXT: [[TMP24:%.*]] = bitcast i32 addrspace(3)* [[TMP23]] to i16 addrspace(3)* -// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP26:%.*]] = load i8*, i8** [[TMP25]], align 4 -// CHECK3-NEXT: [[TMP27:%.*]] = bitcast i8* [[TMP26]] to i16* -// CHECK3-NEXT: [[TMP28:%.*]] = load volatile i16, i16 addrspace(3)* [[TMP24]], align 2 -// CHECK3-NEXT: store i16 [[TMP28]], i16* [[TMP27]], align 2 -// CHECK3-NEXT: br label [[IFCONT14:%.*]] -// CHECK3: else13: -// CHECK3-NEXT: br label [[IFCONT14]] -// CHECK3: ifcont14: -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func19 -// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK3-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.4* -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP7]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK3-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 128 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i16* -// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP7]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP15]], align 2 -// CHECK3-NEXT: store i16 [[TMP17]], i16* [[TMP16]], align 128 -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func20 -// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.4* -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* -// CHECK3-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to i8* -// CHECK3-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func16"(i8* [[TMP12]], i8* [[TMP13]]) #[[ATTR3]] -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func21 -// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK3-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.4* -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP7]] -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 128 -// CHECK3-NEXT: store i32 [[TMP12]], i32* [[TMP10]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i16* -// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP7]] -// CHECK3-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP16]], align 128 -// CHECK3-NEXT: store i16 [[TMP17]], i16* [[TMP15]], align 2 -// CHECK3-NEXT: ret void -// -// -// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func22 -// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK3-NEXT: entry: -// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.4* -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* -// CHECK3-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP5]] -// CHECK3-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to i8* -// CHECK3-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK3-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK3-NEXT: call void @"_omp$reduction$reduction_func16"(i8* [[TMP13]], i8* [[TMP12]]) #[[ATTR3]] -// CHECK3-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_worker // CHECK4-SAME: () #[[ATTR0:[0-9]+]] { // CHECK4-NEXT: entry: @@ -4434,8 +83,6 @@ int bar(int n){ // CHECK4-NEXT: br label [[DOTAWAIT_WORK]] // CHECK4: .exit: // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23 // CHECK4-SAME: (double* nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] { // CHECK4-NEXT: entry: @@ -4483,8 +130,6 @@ int bar(int n){ // CHECK4-NEXT: br label [[DOTEXIT]] // CHECK4: .exit: // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__ // CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], double* nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1]] { // CHECK4-NEXT: entry: @@ -4528,8 +173,6 @@ int bar(int n){ // CHECK4-NEXT: [[TMP17:%.*]] = load i16, i16* @"_openmp_static_kernel$is_shared", align 2 // CHECK4-NEXT: call void @__kmpc_restore_team_static_memory(i16 0, i16 [[TMP17]]) // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func // CHECK4-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { // CHECK4-NEXT: entry: @@ -4604,8 +247,6 @@ int bar(int n){ // CHECK4-NEXT: br label [[IFCONT6]] // CHECK4: ifcont6: // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func // CHECK4-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { // CHECK4-NEXT: entry: @@ -4665,8 +306,6 @@ int bar(int n){ // CHECK4-NEXT: br label [[PRECOND]] // CHECK4: exit: // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func // CHECK4-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { // CHECK4-NEXT: entry: @@ -4689,8 +328,6 @@ int bar(int n){ // CHECK4-NEXT: [[TMP12:%.*]] = load double, double* [[TMP10]], align 8 // CHECK4-NEXT: store double [[TMP12]], double* [[TMP11]], align 128 // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func // CHECK4-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { // CHECK4-NEXT: entry: @@ -4713,8 +350,6 @@ int bar(int n){ // CHECK4-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 // CHECK4-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP9]], i8* [[TMP10]]) #[[ATTR3]] // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func // CHECK4-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { // CHECK4-NEXT: entry: @@ -4737,8 +372,6 @@ int bar(int n){ // CHECK4-NEXT: [[TMP12:%.*]] = load double, double* [[TMP11]], align 128 // CHECK4-NEXT: store double [[TMP12]], double* [[TMP10]], align 8 // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func // CHECK4-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { // CHECK4-NEXT: entry: @@ -4761,8 +394,6 @@ int bar(int n){ // CHECK4-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 // CHECK4-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP10]], i8* [[TMP9]]) #[[ATTR3]] // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_worker // CHECK4-SAME: () #[[ATTR0]] { // CHECK4-NEXT: entry: @@ -4796,8 +427,6 @@ int bar(int n){ // CHECK4-NEXT: br label [[DOTAWAIT_WORK]] // CHECK4: .exit: // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29 // CHECK4-SAME: (i32 [[C:%.*]], i32 [[D:%.*]]) #[[ATTR1]] { // CHECK4-NEXT: entry: @@ -4859,8 +488,6 @@ int bar(int n){ // CHECK4-NEXT: br label [[DOTEXIT]] // CHECK4: .exit: // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__3 // CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8* nonnull align 1 dereferenceable(1) [[C:%.*]], float* nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { // CHECK4-NEXT: entry: @@ -4918,8 +545,6 @@ int bar(int n){ // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK4: .omp.reduction.done: // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func5 // CHECK4-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { // CHECK4-NEXT: entry: @@ -5015,8 +640,6 @@ int bar(int n){ // CHECK4-NEXT: br label [[IFCONT8]] // CHECK4: ifcont8: // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func6 // CHECK4-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { // CHECK4-NEXT: entry: @@ -5091,8 +714,6 @@ int bar(int n){ // CHECK4-NEXT: br label [[IFCONT14]] // CHECK4: ifcont14: // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func7 // CHECK4-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { // CHECK4-NEXT: entry: @@ -5121,8 +742,6 @@ int bar(int n){ // CHECK4-NEXT: [[TMP16:%.*]] = load float, float* [[TMP14]], align 4 // CHECK4-NEXT: store float [[TMP16]], float* [[TMP15]], align 128 // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func8 // CHECK4-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { // CHECK4-NEXT: entry: @@ -5149,8 +768,6 @@ int bar(int n){ // CHECK4-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 // CHECK4-NEXT: call void @"_omp$reduction$reduction_func4"(i8* [[TMP11]], i8* [[TMP12]]) #[[ATTR3]] // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func9 // CHECK4-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { // CHECK4-NEXT: entry: @@ -5179,8 +796,6 @@ int bar(int n){ // CHECK4-NEXT: [[TMP16:%.*]] = load float, float* [[TMP15]], align 128 // CHECK4-NEXT: store float [[TMP16]], float* [[TMP14]], align 4 // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func10 // CHECK4-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { // CHECK4-NEXT: entry: @@ -5207,8 +822,6 @@ int bar(int n){ // CHECK4-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 // CHECK4-NEXT: call void @"_omp$reduction$reduction_func4"(i8* [[TMP12]], i8* [[TMP11]]) #[[ATTR3]] // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l36 // CHECK4-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) #[[ATTR1]] { // CHECK4-NEXT: entry: @@ -5234,8 +847,6 @@ int bar(int n){ // CHECK4-NEXT: br label [[DOTEXIT:%.*]] // CHECK4: .exit: // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__11 // CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i16* nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { // CHECK4-NEXT: entry: @@ -5300,8 +911,6 @@ int bar(int n){ // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK4: .omp.reduction.done: // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@__omp_outlined__12 // CHECK4-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i16* nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { // CHECK4-NEXT: entry: @@ -5373,8 +982,6 @@ int bar(int n){ // CHECK4-NEXT: br label [[DOTOMP_REDUCTION_DONE]] // CHECK4: .omp.reduction.done: // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func14 // CHECK4-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { // CHECK4-NEXT: entry: @@ -5473,8 +1080,6 @@ int bar(int n){ // CHECK4-NEXT: br label [[IFCONT8]] // CHECK4: ifcont8: // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func15 // CHECK4-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { // CHECK4-NEXT: entry: @@ -5551,8 +1156,6 @@ int bar(int n){ // CHECK4-NEXT: br label [[IFCONT14]] // CHECK4: ifcont14: // CHECK4-NEXT: ret void -// -// // CHECK4-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func17 // CHECK4-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { // CHECK4-NEXT: entry: @@ -5651,3082 +1254,7265 @@ int bar(int n){ // CHECK4-NEXT: br label [[IFCONT8]] // CHECK4: ifcont8: // CHECK4-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func18 +// CHECK4-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK4-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK4-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK4-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK4-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 +// CHECK4-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK4-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 +// CHECK4-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK4-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK4-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK4: then: +// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK4-NEXT: store volatile i32 [[TMP9]], i32 addrspace(3)* [[TMP8]], align 4 +// CHECK4-NEXT: br label [[IFCONT:%.*]] +// CHECK4: else: +// CHECK4-NEXT: br label [[IFCONT]] +// CHECK4: ifcont: +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK4-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] +// CHECK4-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK4: then4: +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to i32* +// CHECK4-NEXT: [[TMP15:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP11]], align 4 +// CHECK4-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK4-NEXT: br label [[IFCONT6:%.*]] +// CHECK4: else5: +// CHECK4-NEXT: br label [[IFCONT6]] +// CHECK4: ifcont6: +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK4-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK4-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] +// CHECK4: then8: +// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i16* +// CHECK4-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK4-NEXT: [[TMP20:%.*]] = bitcast i32 addrspace(3)* [[TMP19]] to i16 addrspace(3)* +// CHECK4-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP18]], align 2 +// CHECK4-NEXT: store volatile i16 [[TMP21]], i16 addrspace(3)* [[TMP20]], align 2 +// CHECK4-NEXT: br label [[IFCONT10:%.*]] +// CHECK4: else9: +// CHECK4-NEXT: br label [[IFCONT10]] +// CHECK4: ifcont10: +// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK4-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP22]] +// CHECK4-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] +// CHECK4: then12: +// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK4-NEXT: [[TMP24:%.*]] = bitcast i32 addrspace(3)* [[TMP23]] to i16 addrspace(3)* +// CHECK4-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP26:%.*]] = load i8*, i8** [[TMP25]], align 4 +// CHECK4-NEXT: [[TMP27:%.*]] = bitcast i8* [[TMP26]] to i16* +// CHECK4-NEXT: [[TMP28:%.*]] = load volatile i16, i16 addrspace(3)* [[TMP24]], align 2 +// CHECK4-NEXT: store i16 [[TMP28]], i16* [[TMP27]], align 2 +// CHECK4-NEXT: br label [[IFCONT14:%.*]] +// CHECK4: else13: +// CHECK4-NEXT: br label [[IFCONT14]] +// CHECK4: ifcont14: +// CHECK4-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func19 +// CHECK4-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK4-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK4-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK4-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.4* +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* +// CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* [[A]], i32 0, i32 [[TMP7]] +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK4-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 128 +// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i16* +// CHECK4-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2048 x i16], [2048 x i16]* [[B]], i32 0, i32 [[TMP7]] +// CHECK4-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP15]], align 2 +// CHECK4-NEXT: store i16 [[TMP17]], i16* [[TMP16]], align 128 +// CHECK4-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func20 +// CHECK4-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK4-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK4-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK4-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.4* +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* [[A]], i32 0, i32 [[TMP5]] +// CHECK4-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* +// CHECK4-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK4-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i16], [2048 x i16]* [[B]], i32 0, i32 [[TMP5]] +// CHECK4-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to i8* +// CHECK4-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK4-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK4-NEXT: call void @"_omp$reduction$reduction_func16"(i8* [[TMP12]], i8* [[TMP13]]) #[[ATTR3]] +// CHECK4-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func21 +// CHECK4-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK4-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK4-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK4-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.4* +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* +// CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* [[A]], i32 0, i32 [[TMP7]] +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 128 +// CHECK4-NEXT: store i32 [[TMP12]], i32* [[TMP10]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i16* +// CHECK4-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2048 x i16], [2048 x i16]* [[B]], i32 0, i32 [[TMP7]] +// CHECK4-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP16]], align 128 +// CHECK4-NEXT: store i16 [[TMP17]], i16* [[TMP15]], align 2 +// CHECK4-NEXT: ret void +// CHECK4-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func22 +// CHECK4-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK4-NEXT: entry: +// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK4-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK4-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK4-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK4-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK4-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK4-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.4* +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 0 +// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* [[A]], i32 0, i32 [[TMP5]] +// CHECK4-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* +// CHECK4-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK4-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 1 +// CHECK4-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i16], [2048 x i16]* [[B]], i32 0, i32 [[TMP5]] +// CHECK4-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to i8* +// CHECK4-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK4-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK4-NEXT: call void @"_omp$reduction$reduction_func16"(i8* [[TMP13]], i8* [[TMP12]]) #[[ATTR3]] +// CHECK4-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_worker +// CHECK5-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK5-NEXT: store i8* null, i8** [[WORK_FN]], align 4 +// CHECK5-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK5-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK5: .await.work: +// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK5-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK5-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK5-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK5-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK5-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK5-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK5: .select.workers: +// CHECK5-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK5-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK5-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK5: .execute.parallel: +// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK5-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK5-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) +// CHECK5-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK5: .terminate.parallel: +// CHECK5-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK5-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK5: .barrier.parallel: +// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK5-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK5: .exit: +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23 +// CHECK5-SAME: (double* nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[E_ADDR:%.*]] = alloca double*, align 4 +// CHECK5-NEXT: [[E7:%.*]] = alloca double, align 8 +// CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK5-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 +// CHECK5-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK5-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK5-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK5-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK5-NEXT: br i1 [[TMP1]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK5: .worker: +// CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_worker() #[[ATTR3:[0-9]+]] +// CHECK5-NEXT: br label [[DOTEXIT:%.*]] +// CHECK5: .mastercheck: +// CHECK5-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK5-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK5-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK5-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK5-NEXT: [[TMP3:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK5-NEXT: [[TMP4:%.*]] = xor i32 [[TMP2]], -1 +// CHECK5-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP3]], [[TMP4]] +// CHECK5-NEXT: [[TMP5:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK5-NEXT: br i1 [[TMP5]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK5: .master: +// CHECK5-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK5-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK5-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK5-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK5-NEXT: call void @__kmpc_data_sharing_init_stack() +// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK5-NEXT: [[TMP7:%.*]] = load double, double* [[TMP0]], align 8 +// CHECK5-NEXT: store double [[TMP7]], double* [[E7]], align 8 +// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK5-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E7]]) #[[ATTR3]] +// CHECK5-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK5: .termination.notifier: +// CHECK5-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK5-NEXT: br label [[DOTEXIT]] +// CHECK5: .exit: +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], double* nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[E_ADDR:%.*]] = alloca double*, align 4 +// CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 +// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK5-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 8, i16 1) +// CHECK5-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct._globalized_locals_ty* +// CHECK5-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP2]], i32 0, i32 0 +// CHECK5-NEXT: store double 0.000000e+00, double* [[E1]], align 8 +// CHECK5-NEXT: [[TMP3:%.*]] = load double, double* [[E1]], align 8 +// CHECK5-NEXT: [[ADD:%.*]] = fadd double [[TMP3]], 5.000000e+00 +// CHECK5-NEXT: store double [[ADD]], double* [[E1]], align 8 +// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP7:%.*]] = bitcast double* [[E1]] to i8* +// CHECK5-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK5-NEXT: [[TMP9:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i8* [[TMP9]], i32 1024, i8* [[TMP8]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func) +// CHECK5-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 1 +// CHECK5-NEXT: br i1 [[TMP11]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK5: .omp.reduction.then: +// CHECK5-NEXT: [[TMP12:%.*]] = load double, double* [[TMP0]], align 8 +// CHECK5-NEXT: [[TMP13:%.*]] = load double, double* [[E1]], align 8 +// CHECK5-NEXT: [[ADD2:%.*]] = fadd double [[TMP12]], [[TMP13]] +// CHECK5-NEXT: store double [[ADD2]], double* [[TMP0]], align 8 +// CHECK5-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) +// CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK5: .omp.reduction.done: +// CHECK5-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP1]]) +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func +// CHECK5-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 4 +// CHECK5-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8 +// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 +// CHECK5-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 +// CHECK5-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 +// CHECK5-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* +// CHECK5-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 +// CHECK5-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 +// CHECK5-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to double* +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP12]], i32 1 +// CHECK5-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8* +// CHECK5-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP12]] to i64* +// CHECK5-NEXT: [[TMP16:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i64* +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 8 +// CHECK5-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK5-NEXT: [[TMP18:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 +// CHECK5-NEXT: [[TMP19:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP18]]) +// CHECK5-NEXT: store i64 [[TMP19]], i64* [[TMP16]], align 8 +// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr i64, i64* [[TMP15]], i32 1 +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP16]], i32 1 +// CHECK5-NEXT: [[TMP22:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK5-NEXT: store i8* [[TMP22]], i8** [[TMP11]], align 4 +// CHECK5-NEXT: [[TMP23:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK5-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP25:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP26:%.*]] = and i1 [[TMP24]], [[TMP25]] +// CHECK5-NEXT: [[TMP27:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK5-NEXT: [[TMP28:%.*]] = and i16 [[TMP6]], 1 +// CHECK5-NEXT: [[TMP29:%.*]] = icmp eq i16 [[TMP28]], 0 +// CHECK5-NEXT: [[TMP30:%.*]] = and i1 [[TMP27]], [[TMP29]] +// CHECK5-NEXT: [[TMP31:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK5-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]] +// CHECK5-NEXT: [[TMP33:%.*]] = or i1 [[TMP23]], [[TMP26]] +// CHECK5-NEXT: [[TMP34:%.*]] = or i1 [[TMP33]], [[TMP32]] +// CHECK5-NEXT: br i1 [[TMP34]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK5: then: +// CHECK5-NEXT: [[TMP35:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* +// CHECK5-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK5-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR3]] +// CHECK5-NEXT: br label [[IFCONT:%.*]] +// CHECK5: else: +// CHECK5-NEXT: br label [[IFCONT]] +// CHECK5: ifcont: +// CHECK5-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] +// CHECK5-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK5: then4: +// CHECK5-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP41:%.*]] = load i8*, i8** [[TMP40]], align 4 +// CHECK5-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 4 +// CHECK5-NEXT: [[TMP44:%.*]] = bitcast i8* [[TMP41]] to double* +// CHECK5-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP43]] to double* +// CHECK5-NEXT: [[TMP46:%.*]] = load double, double* [[TMP44]], align 8 +// CHECK5-NEXT: store double [[TMP46]], double* [[TMP45]], align 8 +// CHECK5-NEXT: br label [[IFCONT6:%.*]] +// CHECK5: else5: +// CHECK5-NEXT: br label [[IFCONT6]] +// CHECK5: ifcont6: +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func +// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK5-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK5-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 +// CHECK5-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK5-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 +// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* +// CHECK5-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4 +// CHECK5-NEXT: br label [[PRECOND:%.*]] +// CHECK5: precond: +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP5]], 2 +// CHECK5-NEXT: br i1 [[TMP6]], label [[BODY:%.*]], label [[EXIT:%.*]] +// CHECK5: body: +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK5-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK5-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK5: then: +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[TMP9]], i32 [[TMP5]] +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK5-NEXT: store volatile i32 [[TMP12]], i32 addrspace(3)* [[TMP11]], align 4 +// CHECK5-NEXT: br label [[IFCONT:%.*]] +// CHECK5: else: +// CHECK5-NEXT: br label [[IFCONT]] +// CHECK5: ifcont: +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP13]] +// CHECK5-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK5: then4: +// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i32* +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP17]], i32 [[TMP5]] +// CHECK5-NEXT: [[TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4 +// CHECK5-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4 +// CHECK5-NEXT: br label [[IFCONT6:%.*]] +// CHECK5: else5: +// CHECK5-NEXT: br label [[IFCONT6]] +// CHECK5: ifcont6: +// CHECK5-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK5-NEXT: store i32 [[TMP20]], i32* [[DOTCNT_ADDR]], align 4 +// CHECK5-NEXT: br label [[PRECOND]] +// CHECK5: exit: +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func +// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* +// CHECK5-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.0* +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to double* +// CHECK5-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x double], [1024 x double]* [[E]], i32 0, i32 [[TMP7]] +// CHECK5-NEXT: [[TMP12:%.*]] = load double, double* [[TMP10]], align 8 +// CHECK5-NEXT: store double [[TMP12]], double* [[TMP11]], align 128 +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func +// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 +// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.0* +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK5-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x double], [1024 x double]* [[E]], i32 0, i32 [[TMP5]] +// CHECK5-NEXT: [[TMP8:%.*]] = bitcast double* [[TMP7]] to i8* +// CHECK5-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK5-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK5-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP9]], i8* [[TMP10]]) #[[ATTR3]] +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func +// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* +// CHECK5-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.0* +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to double* +// CHECK5-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x double], [1024 x double]* [[E]], i32 0, i32 [[TMP7]] +// CHECK5-NEXT: [[TMP12:%.*]] = load double, double* [[TMP11]], align 128 +// CHECK5-NEXT: store double [[TMP12]], double* [[TMP10]], align 8 +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func +// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 +// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.0* +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK5-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x double], [1024 x double]* [[E]], i32 0, i32 [[TMP5]] +// CHECK5-NEXT: [[TMP8:%.*]] = bitcast double* [[TMP7]] to i8* +// CHECK5-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK5-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK5-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP10]], i8* [[TMP9]]) #[[ATTR3]] +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_worker +// CHECK5-SAME: () #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK5-NEXT: store i8* null, i8** [[WORK_FN]], align 4 +// CHECK5-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK5-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK5: .await.work: +// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK5-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK5-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK5-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK5-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK5-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK5-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK5: .select.workers: +// CHECK5-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK5-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK5-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK5: .execute.parallel: +// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK5-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK5-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) +// CHECK5-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK5: .terminate.parallel: +// CHECK5-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK5-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK5: .barrier.parallel: +// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK5-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK5: .exit: +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29 +// CHECK5-SAME: (i32 [[C:%.*]], i32 [[D:%.*]]) #[[ATTR1]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[C]], i32* [[C_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[D]], i32* [[D_ADDR]], align 4 +// CHECK5-NEXT: [[CONV:%.*]] = bitcast i32* [[C_ADDR]] to i8* +// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i32* [[D_ADDR]] to float* +// CHECK5-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK5-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK5-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK5-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK5-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK5: .worker: +// CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_worker() #[[ATTR3]] +// CHECK5-NEXT: br label [[DOTEXIT:%.*]] +// CHECK5: .mastercheck: +// CHECK5-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK5-NEXT: [[NVPTX_NUM_THREADS3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK5-NEXT: [[NVPTX_WARP_SIZE4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK5-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE4]], 1 +// CHECK5-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS3]], 1 +// CHECK5-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK5-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK5-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID2]], [[MASTER_TID]] +// CHECK5-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK5: .master: +// CHECK5-NEXT: [[NVPTX_NUM_THREADS5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK5-NEXT: [[NVPTX_WARP_SIZE6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK5-NEXT: [[THREAD_LIMIT7:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS5]], [[NVPTX_WARP_SIZE6]] +// CHECK5-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT7]], i16 1) +// CHECK5-NEXT: call void @__kmpc_data_sharing_init_stack() +// CHECK5-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 8, i16 1) +// CHECK5-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.1* +// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK5-NEXT: [[C8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 1 +// CHECK5-NEXT: store i8 [[TMP7]], i8* [[C8]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load float, float* [[CONV1]], align 4 +// CHECK5-NEXT: [[D9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: store float [[TMP8]], float* [[D9]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK5-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C8]], float* [[D9]]) #[[ATTR3]] +// CHECK5-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP5]]) +// CHECK5-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK5: .termination.notifier: +// CHECK5-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK5-NEXT: br label [[DOTEXIT]] +// CHECK5: .exit: +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8* nonnull align 1 dereferenceable(1) [[C:%.*]], float* nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 +// CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK5-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 4 +// CHECK5-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 8, i16 1) +// CHECK5-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct._globalized_locals_ty.2* +// CHECK5-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], %struct._globalized_locals_ty.2* [[TMP3]], i32 0, i32 1 +// CHECK5-NEXT: [[D2:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], %struct._globalized_locals_ty.2* [[TMP3]], i32 0, i32 0 +// CHECK5-NEXT: store i8 0, i8* [[C1]], align 4 +// CHECK5-NEXT: store float 1.000000e+00, float* [[D2]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = load i8, i8* [[C1]], align 4 +// CHECK5-NEXT: [[CONV:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK5-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2 +// CHECK5-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8 +// CHECK5-NEXT: store i8 [[CONV3]], i8* [[C1]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load float, float* [[D2]], align 4 +// CHECK5-NEXT: [[MUL:%.*]] = fmul float [[TMP5]], 3.300000e+01 +// CHECK5-NEXT: store float [[MUL]], float* [[D2]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK5-NEXT: store i8* [[C1]], i8** [[TMP8]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP10:%.*]] = bitcast float* [[D2]] to i8* +// CHECK5-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK5-NEXT: [[TMP12:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i8* [[TMP12]], i32 1024, i8* [[TMP11]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func3, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func4, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func5, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func6, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func7, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func8) +// CHECK5-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 1 +// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK5: .omp.reduction.then: +// CHECK5-NEXT: [[TMP15:%.*]] = load i8, i8* [[TMP0]], align 1 +// CHECK5-NEXT: [[CONV4:%.*]] = sext i8 [[TMP15]] to i32 +// CHECK5-NEXT: [[TMP16:%.*]] = load i8, i8* [[C1]], align 4 +// CHECK5-NEXT: [[CONV5:%.*]] = sext i8 [[TMP16]] to i32 +// CHECK5-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] +// CHECK5-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 +// CHECK5-NEXT: store i8 [[CONV7]], i8* [[TMP0]], align 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load float, float* [[TMP1]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load float, float* [[D2]], align 4 +// CHECK5-NEXT: [[MUL8:%.*]] = fmul float [[TMP17]], [[TMP18]] +// CHECK5-NEXT: store float [[MUL8]], float* [[TMP1]], align 4 +// CHECK5-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP7]]) +// CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK5: .omp.reduction.done: +// CHECK5-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP2]]) +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func3 +// CHECK5-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK5-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i8, align 1 +// CHECK5-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca float, align 4 +// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 +// CHECK5-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 +// CHECK5-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 +// CHECK5-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* +// CHECK5-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 +// CHECK5-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 +// CHECK5-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[TMP10]], i32 1 +// CHECK5-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP10]], align 1 +// CHECK5-NEXT: [[TMP14:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK5-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK5-NEXT: [[TMP15:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 +// CHECK5-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP14]], i16 [[TMP7]], i16 [[TMP15]]) +// CHECK5-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8 +// CHECK5-NEXT: store i8 [[TMP17]], i8* [[DOTOMP_REDUCTION_ELEMENT]], align 1 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr i8, i8* [[TMP10]], i32 1 +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK5-NEXT: store i8* [[DOTOMP_REDUCTION_ELEMENT]], i8** [[TMP11]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 4 +// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP23:%.*]] = bitcast i8* [[TMP21]] to float* +// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr float, float* [[TMP23]], i32 1 +// CHECK5-NEXT: [[TMP25:%.*]] = bitcast float* [[TMP24]] to i8* +// CHECK5-NEXT: [[TMP26:%.*]] = bitcast float* [[TMP23]] to i32* +// CHECK5-NEXT: [[TMP27:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i32* +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK5-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK5-NEXT: [[TMP29:%.*]] = trunc i32 [[NVPTX_WARP_SIZE5]] to i16 +// CHECK5-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP28]], i16 [[TMP7]], i16 [[TMP29]]) +// CHECK5-NEXT: store i32 [[TMP30]], i32* [[TMP27]], align 4 +// CHECK5-NEXT: [[TMP31:%.*]] = getelementptr i32, i32* [[TMP26]], i32 1 +// CHECK5-NEXT: [[TMP32:%.*]] = getelementptr i32, i32* [[TMP27]], i32 1 +// CHECK5-NEXT: [[TMP33:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* +// CHECK5-NEXT: store i8* [[TMP33]], i8** [[TMP22]], align 4 +// CHECK5-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK5-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP36:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP37:%.*]] = and i1 [[TMP35]], [[TMP36]] +// CHECK5-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK5-NEXT: [[TMP39:%.*]] = and i16 [[TMP6]], 1 +// CHECK5-NEXT: [[TMP40:%.*]] = icmp eq i16 [[TMP39]], 0 +// CHECK5-NEXT: [[TMP41:%.*]] = and i1 [[TMP38]], [[TMP40]] +// CHECK5-NEXT: [[TMP42:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK5-NEXT: [[TMP43:%.*]] = and i1 [[TMP41]], [[TMP42]] +// CHECK5-NEXT: [[TMP44:%.*]] = or i1 [[TMP34]], [[TMP37]] +// CHECK5-NEXT: [[TMP45:%.*]] = or i1 [[TMP44]], [[TMP43]] +// CHECK5-NEXT: br i1 [[TMP45]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK5: then: +// CHECK5-NEXT: [[TMP46:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* +// CHECK5-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK5-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP46]], i8* [[TMP47]]) #[[ATTR3]] +// CHECK5-NEXT: br label [[IFCONT:%.*]] +// CHECK5: else: +// CHECK5-NEXT: br label [[IFCONT]] +// CHECK5: ifcont: +// CHECK5-NEXT: [[TMP48:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP49:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP50:%.*]] = and i1 [[TMP48]], [[TMP49]] +// CHECK5-NEXT: br i1 [[TMP50]], label [[THEN6:%.*]], label [[ELSE7:%.*]] +// CHECK5: then6: +// CHECK5-NEXT: [[TMP51:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP52:%.*]] = load i8*, i8** [[TMP51]], align 4 +// CHECK5-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP54:%.*]] = load i8*, i8** [[TMP53]], align 4 +// CHECK5-NEXT: [[TMP55:%.*]] = load i8, i8* [[TMP52]], align 1 +// CHECK5-NEXT: store i8 [[TMP55]], i8* [[TMP54]], align 1 +// CHECK5-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP57:%.*]] = load i8*, i8** [[TMP56]], align 4 +// CHECK5-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP59:%.*]] = load i8*, i8** [[TMP58]], align 4 +// CHECK5-NEXT: [[TMP60:%.*]] = bitcast i8* [[TMP57]] to float* +// CHECK5-NEXT: [[TMP61:%.*]] = bitcast i8* [[TMP59]] to float* +// CHECK5-NEXT: [[TMP62:%.*]] = load float, float* [[TMP60]], align 4 +// CHECK5-NEXT: store float [[TMP62]], float* [[TMP61]], align 4 +// CHECK5-NEXT: br label [[IFCONT8:%.*]] +// CHECK5: else7: +// CHECK5-NEXT: br label [[IFCONT8]] +// CHECK5: ifcont8: +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func4 +// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK5-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK5-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 +// CHECK5-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK5-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 +// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK5-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK5-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK5: then: +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK5-NEXT: [[TMP8:%.*]] = bitcast i32 addrspace(3)* [[TMP7]] to i8 addrspace(3)* +// CHECK5-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP6]], align 1 +// CHECK5-NEXT: store volatile i8 [[TMP9]], i8 addrspace(3)* [[TMP8]], align 1 +// CHECK5-NEXT: br label [[IFCONT:%.*]] +// CHECK5: else: +// CHECK5-NEXT: br label [[IFCONT]] +// CHECK5: ifcont: +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] +// CHECK5-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK5: then4: +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK5-NEXT: [[TMP12:%.*]] = bitcast i32 addrspace(3)* [[TMP11]] to i8 addrspace(3)* +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load volatile i8, i8 addrspace(3)* [[TMP12]], align 1 +// CHECK5-NEXT: store i8 [[TMP15]], i8* [[TMP14]], align 1 +// CHECK5-NEXT: br label [[IFCONT6:%.*]] +// CHECK5: else5: +// CHECK5-NEXT: br label [[IFCONT6]] +// CHECK5: ifcont6: +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK5-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK5-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] +// CHECK5: then8: +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i32* +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK5-NEXT: store volatile i32 [[TMP20]], i32 addrspace(3)* [[TMP19]], align 4 +// CHECK5-NEXT: br label [[IFCONT10:%.*]] +// CHECK5: else9: +// CHECK5-NEXT: br label [[IFCONT10]] +// CHECK5: ifcont10: +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP21]] +// CHECK5-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] +// CHECK5: then12: +// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 4 +// CHECK5-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to i32* +// CHECK5-NEXT: [[TMP26:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP22]], align 4 +// CHECK5-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4 +// CHECK5-NEXT: br label [[IFCONT14:%.*]] +// CHECK5: else13: +// CHECK5-NEXT: br label [[IFCONT14]] +// CHECK5: ifcont14: +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func5 +// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK5-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.3* +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 +// CHECK5-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3:%.*]], %struct._globalized_locals_ty.3* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP7]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP9]], align 1 +// CHECK5-NEXT: store i8 [[TMP11]], i8* [[TMP10]], align 128 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to float* +// CHECK5-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3]], %struct._globalized_locals_ty.3* [[TMP6]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP7]] +// CHECK5-NEXT: [[TMP16:%.*]] = load float, float* [[TMP14]], align 4 +// CHECK5-NEXT: store float [[TMP16]], float* [[TMP15]], align 128 +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func6 +// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.3* +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK5-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3:%.*]], %struct._globalized_locals_ty.3* [[TMP4]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP5]] +// CHECK5-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK5-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3]], %struct._globalized_locals_ty.3* [[TMP4]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP5]] +// CHECK5-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to i8* +// CHECK5-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK5-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK5-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP11]], i8* [[TMP12]]) #[[ATTR3]] +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func7 +// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK5-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.3* +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 +// CHECK5-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3:%.*]], %struct._globalized_locals_ty.3* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP7]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 128 +// CHECK5-NEXT: store i8 [[TMP11]], i8* [[TMP9]], align 1 +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to float* +// CHECK5-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3]], %struct._globalized_locals_ty.3* [[TMP6]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP7]] +// CHECK5-NEXT: [[TMP16:%.*]] = load float, float* [[TMP15]], align 128 +// CHECK5-NEXT: store float [[TMP16]], float* [[TMP14]], align 4 +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func8 +// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.3* +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK5-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3:%.*]], %struct._globalized_locals_ty.3* [[TMP4]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP5]] +// CHECK5-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK5-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3]], %struct._globalized_locals_ty.3* [[TMP4]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP5]] +// CHECK5-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to i8* +// CHECK5-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK5-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK5-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP12]], i8* [[TMP11]]) #[[ATTR3]] +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l36 +// CHECK5-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) #[[ATTR1]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK5-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 +// CHECK5-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* +// CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK5-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) +// CHECK5-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() +// CHECK5-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK5: .execute: +// CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK5-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[A_ADDR]], i16* [[CONV]]) #[[ATTR3]] +// CHECK5-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK5: .omp.deinit: +// CHECK5-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) +// CHECK5-NEXT: br label [[DOTEXIT:%.*]] +// CHECK5: .exit: +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__9 +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i16* nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 +// CHECK5-NEXT: [[A1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[B2:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 +// CHECK5-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 +// CHECK5-NEXT: store i32 0, i32* [[A1]], align 4 +// CHECK5-NEXT: store i16 -32768, i16* [[B2]], align 2 +// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP3:%.*]] = bitcast i32* [[A1]] to i8* +// CHECK5-NEXT: store i8* [[TMP3]], i8** [[TMP2]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP5:%.*]] = bitcast i16* [[B2]] to i8* +// CHECK5-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK5-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*)* @__omp_outlined__10 to i8*), i8* null, i8** [[TMP8]], i32 2) +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP10:%.*]] = bitcast i32* [[A1]] to i8* +// CHECK5-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP12:%.*]] = bitcast i16* [[B2]] to i8* +// CHECK5-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK5-NEXT: [[TMP14:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i8* [[TMP14]], i32 1024, i8* [[TMP13]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func15, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func16, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func17, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func18, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func19, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func20) +// CHECK5-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1 +// CHECK5-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK5: .omp.reduction.then: +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK5-NEXT: [[OR:%.*]] = or i32 [[TMP17]], [[TMP18]] +// CHECK5-NEXT: store i32 [[OR]], i32* [[TMP0]], align 4 +// CHECK5-NEXT: [[TMP19:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32 +// CHECK5-NEXT: [[TMP20:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK5-NEXT: [[CONV3:%.*]] = sext i16 [[TMP20]] to i32 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV3]] +// CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK5: cond.true: +// CHECK5-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK5-NEXT: br label [[COND_END:%.*]] +// CHECK5: cond.false: +// CHECK5-NEXT: [[TMP22:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK5-NEXT: br label [[COND_END]] +// CHECK5: cond.end: +// CHECK5-NEXT: [[COND:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] +// CHECK5-NEXT: store i16 [[COND]], i16* [[TMP1]], align 2 +// CHECK5-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP7]]) +// CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK5: .omp.reduction.done: +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__10 +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i16* nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 +// CHECK5-NEXT: [[A1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[B2:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 +// CHECK5-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 +// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 +// CHECK5-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 +// CHECK5-NEXT: store i32 0, i32* [[A1]], align 4 +// CHECK5-NEXT: store i16 -32768, i16* [[B2]], align 2 +// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK5-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1 +// CHECK5-NEXT: store i32 [[OR]], i32* [[A1]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]] +// CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK5: cond.true: +// CHECK5-NEXT: br label [[COND_END:%.*]] +// CHECK5: cond.false: +// CHECK5-NEXT: [[TMP4:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK5-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK5-NEXT: br label [[COND_END]] +// CHECK5: cond.end: +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] +// CHECK5-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16 +// CHECK5-NEXT: store i16 [[CONV4]], i16* [[B2]], align 2 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP8:%.*]] = bitcast i32* [[A1]] to i8* +// CHECK5-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP10:%.*]] = bitcast i16* [[B2]] to i8* +// CHECK5-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK5-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 2, i32 8, i8* [[TMP11]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func12, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func13) +// CHECK5-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 +// CHECK5-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK5: .omp.reduction.then: +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK5-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]] +// CHECK5-NEXT: store i32 [[OR5]], i32* [[TMP0]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK5-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK5-NEXT: [[TMP17:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK5-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32 +// CHECK5-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] +// CHECK5-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] +// CHECK5: cond.true9: +// CHECK5-NEXT: [[TMP18:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK5-NEXT: br label [[COND_END11:%.*]] +// CHECK5: cond.false10: +// CHECK5-NEXT: [[TMP19:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK5-NEXT: br label [[COND_END11]] +// CHECK5: cond.end11: +// CHECK5-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ] +// CHECK5-NEXT: store i16 [[COND12]], i16* [[TMP1]], align 2 +// CHECK5-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) +// CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK5: .omp.reduction.done: +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func12 +// CHECK5-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK5-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 +// CHECK5-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 +// CHECK5-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 +// CHECK5-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* +// CHECK5-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 +// CHECK5-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 +// CHECK5-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 +// CHECK5-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK5-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK5-NEXT: [[TMP16:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 +// CHECK5-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP16]]) +// CHECK5-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK5-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK5-NEXT: store i8* [[TMP20]], i8** [[TMP11]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 4 +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP22]] to i16* +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 +// CHECK5-NEXT: [[TMP26:%.*]] = bitcast i16* [[TMP25]] to i8* +// CHECK5-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP24]], align 2 +// CHECK5-NEXT: [[TMP28:%.*]] = sext i16 [[TMP27]] to i32 +// CHECK5-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK5-NEXT: [[TMP29:%.*]] = trunc i32 [[NVPTX_WARP_SIZE5]] to i16 +// CHECK5-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP28]], i16 [[TMP7]], i16 [[TMP29]]) +// CHECK5-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 +// CHECK5-NEXT: store i16 [[TMP31]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 +// CHECK5-NEXT: [[TMP32:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 +// CHECK5-NEXT: [[TMP33:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 +// CHECK5-NEXT: [[TMP34:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* +// CHECK5-NEXT: store i8* [[TMP34]], i8** [[TMP23]], align 4 +// CHECK5-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK5-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP37:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP38:%.*]] = and i1 [[TMP36]], [[TMP37]] +// CHECK5-NEXT: [[TMP39:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK5-NEXT: [[TMP40:%.*]] = and i16 [[TMP6]], 1 +// CHECK5-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP40]], 0 +// CHECK5-NEXT: [[TMP42:%.*]] = and i1 [[TMP39]], [[TMP41]] +// CHECK5-NEXT: [[TMP43:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK5-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] +// CHECK5-NEXT: [[TMP45:%.*]] = or i1 [[TMP35]], [[TMP38]] +// CHECK5-NEXT: [[TMP46:%.*]] = or i1 [[TMP45]], [[TMP44]] +// CHECK5-NEXT: br i1 [[TMP46]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK5: then: +// CHECK5-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* +// CHECK5-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK5-NEXT: call void @"_omp$reduction$reduction_func11"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR3]] +// CHECK5-NEXT: br label [[IFCONT:%.*]] +// CHECK5: else: +// CHECK5-NEXT: br label [[IFCONT]] +// CHECK5: ifcont: +// CHECK5-NEXT: [[TMP49:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP50:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP51:%.*]] = and i1 [[TMP49]], [[TMP50]] +// CHECK5-NEXT: br i1 [[TMP51]], label [[THEN6:%.*]], label [[ELSE7:%.*]] +// CHECK5: then6: +// CHECK5-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP53:%.*]] = load i8*, i8** [[TMP52]], align 4 +// CHECK5-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP55:%.*]] = load i8*, i8** [[TMP54]], align 4 +// CHECK5-NEXT: [[TMP56:%.*]] = bitcast i8* [[TMP53]] to i32* +// CHECK5-NEXT: [[TMP57:%.*]] = bitcast i8* [[TMP55]] to i32* +// CHECK5-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP56]], align 4 +// CHECK5-NEXT: store i32 [[TMP58]], i32* [[TMP57]], align 4 +// CHECK5-NEXT: [[TMP59:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP60:%.*]] = load i8*, i8** [[TMP59]], align 4 +// CHECK5-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP62:%.*]] = load i8*, i8** [[TMP61]], align 4 +// CHECK5-NEXT: [[TMP63:%.*]] = bitcast i8* [[TMP60]] to i16* +// CHECK5-NEXT: [[TMP64:%.*]] = bitcast i8* [[TMP62]] to i16* +// CHECK5-NEXT: [[TMP65:%.*]] = load i16, i16* [[TMP63]], align 2 +// CHECK5-NEXT: store i16 [[TMP65]], i16* [[TMP64]], align 2 +// CHECK5-NEXT: br label [[IFCONT8:%.*]] +// CHECK5: else7: +// CHECK5-NEXT: br label [[IFCONT8]] +// CHECK5: ifcont8: +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func13 +// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK5-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK5-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 +// CHECK5-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK5-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 +// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP2]]) +// CHECK5-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK5-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK5: then: +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: store volatile i32 [[TMP9]], i32 addrspace(3)* [[TMP8]], align 4 +// CHECK5-NEXT: br label [[IFCONT:%.*]] +// CHECK5: else: +// CHECK5-NEXT: br label [[IFCONT]] +// CHECK5: ifcont: +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] +// CHECK5-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK5: then4: +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to i32* +// CHECK5-NEXT: [[TMP15:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP11]], align 4 +// CHECK5-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK5-NEXT: br label [[IFCONT6:%.*]] +// CHECK5: else5: +// CHECK5-NEXT: br label [[IFCONT6]] +// CHECK5: ifcont6: +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK5-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK5-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] +// CHECK5: then8: +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i16* +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK5-NEXT: [[TMP20:%.*]] = bitcast i32 addrspace(3)* [[TMP19]] to i16 addrspace(3)* +// CHECK5-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP18]], align 2 +// CHECK5-NEXT: store volatile i16 [[TMP21]], i16 addrspace(3)* [[TMP20]], align 2 +// CHECK5-NEXT: br label [[IFCONT10:%.*]] +// CHECK5: else9: +// CHECK5-NEXT: br label [[IFCONT10]] +// CHECK5: ifcont10: +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP22]] +// CHECK5-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] +// CHECK5: then12: +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK5-NEXT: [[TMP24:%.*]] = bitcast i32 addrspace(3)* [[TMP23]] to i16 addrspace(3)* +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP26:%.*]] = load i8*, i8** [[TMP25]], align 4 +// CHECK5-NEXT: [[TMP27:%.*]] = bitcast i8* [[TMP26]] to i16* +// CHECK5-NEXT: [[TMP28:%.*]] = load volatile i16, i16 addrspace(3)* [[TMP24]], align 2 +// CHECK5-NEXT: store i16 [[TMP28]], i16* [[TMP27]], align 2 +// CHECK5-NEXT: br label [[IFCONT14:%.*]] +// CHECK5: else13: +// CHECK5-NEXT: br label [[IFCONT14]] +// CHECK5: ifcont14: +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func15 +// CHECK5-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK5-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2 +// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 +// CHECK5-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 +// CHECK5-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 +// CHECK5-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* +// CHECK5-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 +// CHECK5-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 +// CHECK5-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 +// CHECK5-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK5-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK5-NEXT: [[TMP16:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 +// CHECK5-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP16]]) +// CHECK5-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK5-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK5-NEXT: store i8* [[TMP20]], i8** [[TMP11]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 4 +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP22]] to i16* +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 +// CHECK5-NEXT: [[TMP26:%.*]] = bitcast i16* [[TMP25]] to i8* +// CHECK5-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP24]], align 2 +// CHECK5-NEXT: [[TMP28:%.*]] = sext i16 [[TMP27]] to i32 +// CHECK5-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK5-NEXT: [[TMP29:%.*]] = trunc i32 [[NVPTX_WARP_SIZE5]] to i16 +// CHECK5-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP28]], i16 [[TMP7]], i16 [[TMP29]]) +// CHECK5-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 +// CHECK5-NEXT: store i16 [[TMP31]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 +// CHECK5-NEXT: [[TMP32:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 +// CHECK5-NEXT: [[TMP33:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 +// CHECK5-NEXT: [[TMP34:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* +// CHECK5-NEXT: store i8* [[TMP34]], i8** [[TMP23]], align 4 +// CHECK5-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK5-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP37:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP38:%.*]] = and i1 [[TMP36]], [[TMP37]] +// CHECK5-NEXT: [[TMP39:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK5-NEXT: [[TMP40:%.*]] = and i16 [[TMP6]], 1 +// CHECK5-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP40]], 0 +// CHECK5-NEXT: [[TMP42:%.*]] = and i1 [[TMP39]], [[TMP41]] +// CHECK5-NEXT: [[TMP43:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK5-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] +// CHECK5-NEXT: [[TMP45:%.*]] = or i1 [[TMP35]], [[TMP38]] +// CHECK5-NEXT: [[TMP46:%.*]] = or i1 [[TMP45]], [[TMP44]] +// CHECK5-NEXT: br i1 [[TMP46]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK5: then: +// CHECK5-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* +// CHECK5-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK5-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR3]] +// CHECK5-NEXT: br label [[IFCONT:%.*]] +// CHECK5: else: +// CHECK5-NEXT: br label [[IFCONT]] +// CHECK5: ifcont: +// CHECK5-NEXT: [[TMP49:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK5-NEXT: [[TMP50:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK5-NEXT: [[TMP51:%.*]] = and i1 [[TMP49]], [[TMP50]] +// CHECK5-NEXT: br i1 [[TMP51]], label [[THEN6:%.*]], label [[ELSE7:%.*]] +// CHECK5: then6: +// CHECK5-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP53:%.*]] = load i8*, i8** [[TMP52]], align 4 +// CHECK5-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP55:%.*]] = load i8*, i8** [[TMP54]], align 4 +// CHECK5-NEXT: [[TMP56:%.*]] = bitcast i8* [[TMP53]] to i32* +// CHECK5-NEXT: [[TMP57:%.*]] = bitcast i8* [[TMP55]] to i32* +// CHECK5-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP56]], align 4 +// CHECK5-NEXT: store i32 [[TMP58]], i32* [[TMP57]], align 4 +// CHECK5-NEXT: [[TMP59:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP60:%.*]] = load i8*, i8** [[TMP59]], align 4 +// CHECK5-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP62:%.*]] = load i8*, i8** [[TMP61]], align 4 +// CHECK5-NEXT: [[TMP63:%.*]] = bitcast i8* [[TMP60]] to i16* +// CHECK5-NEXT: [[TMP64:%.*]] = bitcast i8* [[TMP62]] to i16* +// CHECK5-NEXT: [[TMP65:%.*]] = load i16, i16* [[TMP63]], align 2 +// CHECK5-NEXT: store i16 [[TMP65]], i16* [[TMP64]], align 2 +// CHECK5-NEXT: br label [[IFCONT8:%.*]] +// CHECK5: else7: +// CHECK5-NEXT: br label [[IFCONT8]] +// CHECK5: ifcont8: +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func16 +// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK5-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK5-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 +// CHECK5-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK5-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 +// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK5-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK5-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK5: then: +// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK5-NEXT: store volatile i32 [[TMP9]], i32 addrspace(3)* [[TMP8]], align 4 +// CHECK5-NEXT: br label [[IFCONT:%.*]] +// CHECK5: else: +// CHECK5-NEXT: br label [[IFCONT]] +// CHECK5: ifcont: +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] +// CHECK5-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK5: then4: +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to i32* +// CHECK5-NEXT: [[TMP15:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP11]], align 4 +// CHECK5-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK5-NEXT: br label [[IFCONT6:%.*]] +// CHECK5: else5: +// CHECK5-NEXT: br label [[IFCONT6]] +// CHECK5: ifcont6: +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK5-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK5-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] +// CHECK5: then8: +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i16* +// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK5-NEXT: [[TMP20:%.*]] = bitcast i32 addrspace(3)* [[TMP19]] to i16 addrspace(3)* +// CHECK5-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP18]], align 2 +// CHECK5-NEXT: store volatile i16 [[TMP21]], i16 addrspace(3)* [[TMP20]], align 2 +// CHECK5-NEXT: br label [[IFCONT10:%.*]] +// CHECK5: else9: +// CHECK5-NEXT: br label [[IFCONT10]] +// CHECK5: ifcont10: +// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP22]] +// CHECK5-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] +// CHECK5: then12: +// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK5-NEXT: [[TMP24:%.*]] = bitcast i32 addrspace(3)* [[TMP23]] to i16 addrspace(3)* +// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP26:%.*]] = load i8*, i8** [[TMP25]], align 4 +// CHECK5-NEXT: [[TMP27:%.*]] = bitcast i8* [[TMP26]] to i16* +// CHECK5-NEXT: [[TMP28:%.*]] = load volatile i16, i16 addrspace(3)* [[TMP24]], align 2 +// CHECK5-NEXT: store i16 [[TMP28]], i16* [[TMP27]], align 2 +// CHECK5-NEXT: br label [[IFCONT14:%.*]] +// CHECK5: else13: +// CHECK5-NEXT: br label [[IFCONT14]] +// CHECK5: ifcont14: +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func17 +// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK5-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.4* +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP7]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK5-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 128 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i16* +// CHECK5-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP7]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP15]], align 2 +// CHECK5-NEXT: store i16 [[TMP17]], i16* [[TMP16]], align 128 +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func18 +// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.4* +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP5]] +// CHECK5-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* +// CHECK5-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK5-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP5]] +// CHECK5-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to i8* +// CHECK5-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK5-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK5-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP12]], i8* [[TMP13]]) #[[ATTR3]] +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func19 +// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK5-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.4* +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP7]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 128 +// CHECK5-NEXT: store i32 [[TMP12]], i32* [[TMP10]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i16* +// CHECK5-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP7]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP16]], align 128 +// CHECK5-NEXT: store i16 [[TMP17]], i16* [[TMP15]], align 2 +// CHECK5-NEXT: ret void +// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func20 +// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK5-NEXT: entry: +// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.4* +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 0 +// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP5]] +// CHECK5-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* +// CHECK5-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK5-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 1 +// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP5]] +// CHECK5-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to i8* +// CHECK5-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK5-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK5-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP13]], i8* [[TMP12]]) #[[ATTR3]] +// CHECK5-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_worker +// CHECK6-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK6-NEXT: store i8* null, i8** [[WORK_FN]], align 4 +// CHECK6-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK6-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK6: .await.work: +// CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK6-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK6-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK6-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK6-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK6-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK6-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK6: .select.workers: +// CHECK6-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK6-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK6-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK6: .execute.parallel: +// CHECK6-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK6-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK6-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) +// CHECK6-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK6: .terminate.parallel: +// CHECK6-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK6-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK6: .barrier.parallel: +// CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK6-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK6: .exit: +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23 +// CHECK6-SAME: (double* nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[E_ADDR:%.*]] = alloca double*, align 4 +// CHECK6-NEXT: [[E7:%.*]] = alloca double, align 8 +// CHECK6-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK6-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 +// CHECK6-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK6-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK6-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK6-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK6-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK6-NEXT: br i1 [[TMP1]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK6: .worker: +// CHECK6-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_worker() #[[ATTR3:[0-9]+]] +// CHECK6-NEXT: br label [[DOTEXIT:%.*]] +// CHECK6: .mastercheck: +// CHECK6-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK6-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK6-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK6-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK6-NEXT: [[TMP3:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK6-NEXT: [[TMP4:%.*]] = xor i32 [[TMP2]], -1 +// CHECK6-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP3]], [[TMP4]] +// CHECK6-NEXT: [[TMP5:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK6-NEXT: br i1 [[TMP5]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK6: .master: +// CHECK6-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK6-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK6-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK6-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK6-NEXT: call void @__kmpc_data_sharing_init_stack() +// CHECK6-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK6-NEXT: [[TMP7:%.*]] = load double, double* [[TMP0]], align 8 +// CHECK6-NEXT: store double [[TMP7]], double* [[E7]], align 8 +// CHECK6-NEXT: store i32 [[TMP6]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK6-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E7]]) #[[ATTR3]] +// CHECK6-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK6: .termination.notifier: +// CHECK6-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK6-NEXT: br label [[DOTEXIT]] +// CHECK6: .exit: +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], double* nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK6-NEXT: [[E_ADDR:%.*]] = alloca double*, align 4 +// CHECK6-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 +// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK6-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 8, i16 1) +// CHECK6-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct._globalized_locals_ty* +// CHECK6-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP2]], i32 0, i32 0 +// CHECK6-NEXT: store double 0.000000e+00, double* [[E1]], align 8 +// CHECK6-NEXT: [[TMP3:%.*]] = load double, double* [[E1]], align 8 +// CHECK6-NEXT: [[ADD:%.*]] = fadd double [[TMP3]], 5.000000e+00 +// CHECK6-NEXT: store double [[ADD]], double* [[E1]], align 8 +// CHECK6-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP7:%.*]] = bitcast double* [[E1]] to i8* +// CHECK6-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK6-NEXT: [[TMP9:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i8* [[TMP9]], i32 2048, i8* [[TMP8]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func) +// CHECK6-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 1 +// CHECK6-NEXT: br i1 [[TMP11]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK6: .omp.reduction.then: +// CHECK6-NEXT: [[TMP12:%.*]] = load double, double* [[TMP0]], align 8 +// CHECK6-NEXT: [[TMP13:%.*]] = load double, double* [[E1]], align 8 +// CHECK6-NEXT: [[ADD2:%.*]] = fadd double [[TMP12]], [[TMP13]] +// CHECK6-NEXT: store double [[ADD2]], double* [[TMP0]], align 8 +// CHECK6-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) +// CHECK6-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK6: .omp.reduction.done: +// CHECK6-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP1]]) +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func +// CHECK6-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 +// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 +// CHECK6-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 +// CHECK6-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 4 +// CHECK6-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8 +// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 +// CHECK6-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 +// CHECK6-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 +// CHECK6-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* +// CHECK6-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 +// CHECK6-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 +// CHECK6-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 +// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to double* +// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP12]], i32 1 +// CHECK6-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8* +// CHECK6-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP12]] to i64* +// CHECK6-NEXT: [[TMP16:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i64* +// CHECK6-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 8 +// CHECK6-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK6-NEXT: [[TMP18:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 +// CHECK6-NEXT: [[TMP19:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP18]]) +// CHECK6-NEXT: store i64 [[TMP19]], i64* [[TMP16]], align 8 +// CHECK6-NEXT: [[TMP20:%.*]] = getelementptr i64, i64* [[TMP15]], i32 1 +// CHECK6-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP16]], i32 1 +// CHECK6-NEXT: [[TMP22:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK6-NEXT: store i8* [[TMP22]], i8** [[TMP11]], align 4 +// CHECK6-NEXT: [[TMP23:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK6-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK6-NEXT: [[TMP25:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK6-NEXT: [[TMP26:%.*]] = and i1 [[TMP24]], [[TMP25]] +// CHECK6-NEXT: [[TMP27:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK6-NEXT: [[TMP28:%.*]] = and i16 [[TMP6]], 1 +// CHECK6-NEXT: [[TMP29:%.*]] = icmp eq i16 [[TMP28]], 0 +// CHECK6-NEXT: [[TMP30:%.*]] = and i1 [[TMP27]], [[TMP29]] +// CHECK6-NEXT: [[TMP31:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK6-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]] +// CHECK6-NEXT: [[TMP33:%.*]] = or i1 [[TMP23]], [[TMP26]] +// CHECK6-NEXT: [[TMP34:%.*]] = or i1 [[TMP33]], [[TMP32]] +// CHECK6-NEXT: br i1 [[TMP34]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK6: then: +// CHECK6-NEXT: [[TMP35:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* +// CHECK6-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK6-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR3]] +// CHECK6-NEXT: br label [[IFCONT:%.*]] +// CHECK6: else: +// CHECK6-NEXT: br label [[IFCONT]] +// CHECK6: ifcont: +// CHECK6-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK6-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK6-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] +// CHECK6-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK6: then4: +// CHECK6-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP41:%.*]] = load i8*, i8** [[TMP40]], align 4 +// CHECK6-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 4 +// CHECK6-NEXT: [[TMP44:%.*]] = bitcast i8* [[TMP41]] to double* +// CHECK6-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP43]] to double* +// CHECK6-NEXT: [[TMP46:%.*]] = load double, double* [[TMP44]], align 8 +// CHECK6-NEXT: store double [[TMP46]], double* [[TMP45]], align 8 +// CHECK6-NEXT: br label [[IFCONT6:%.*]] +// CHECK6: else5: +// CHECK6-NEXT: br label [[IFCONT6]] +// CHECK6: ifcont6: +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func +// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK6-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK6-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 +// CHECK6-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK6-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 +// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* +// CHECK6-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4 +// CHECK6-NEXT: br label [[PRECOND:%.*]] +// CHECK6: precond: +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP5]], 2 +// CHECK6-NEXT: br i1 [[TMP6]], label [[BODY:%.*]], label [[EXIT:%.*]] +// CHECK6: body: +// CHECK6-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK6-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK6-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK6: then: +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* +// CHECK6-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[TMP9]], i32 [[TMP5]] +// CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK6-NEXT: store volatile i32 [[TMP12]], i32 addrspace(3)* [[TMP11]], align 4 +// CHECK6-NEXT: br label [[IFCONT:%.*]] +// CHECK6: else: +// CHECK6-NEXT: br label [[IFCONT]] +// CHECK6: ifcont: +// CHECK6-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP13]] +// CHECK6-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK6: then4: +// CHECK6-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK6-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 4 +// CHECK6-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i32* +// CHECK6-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP17]], i32 [[TMP5]] +// CHECK6-NEXT: [[TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4 +// CHECK6-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4 +// CHECK6-NEXT: br label [[IFCONT6:%.*]] +// CHECK6: else5: +// CHECK6-NEXT: br label [[IFCONT6]] +// CHECK6: ifcont6: +// CHECK6-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK6-NEXT: store i32 [[TMP20]], i32* [[DOTCNT_ADDR]], align 4 +// CHECK6-NEXT: br label [[PRECOND]] +// CHECK6: exit: +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func +// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* +// CHECK6-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.0* +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to double* +// CHECK6-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2048 x double], [2048 x double]* [[E]], i32 0, i32 [[TMP7]] +// CHECK6-NEXT: [[TMP12:%.*]] = load double, double* [[TMP10]], align 8 +// CHECK6-NEXT: store double [[TMP12]], double* [[TMP11]], align 128 +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func +// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 +// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.0* +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK6-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x double], [2048 x double]* [[E]], i32 0, i32 [[TMP5]] +// CHECK6-NEXT: [[TMP8:%.*]] = bitcast double* [[TMP7]] to i8* +// CHECK6-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK6-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK6-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP9]], i8* [[TMP10]]) #[[ATTR3]] +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func +// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* +// CHECK6-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.0* +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to double* +// CHECK6-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2048 x double], [2048 x double]* [[E]], i32 0, i32 [[TMP7]] +// CHECK6-NEXT: [[TMP12:%.*]] = load double, double* [[TMP11]], align 128 +// CHECK6-NEXT: store double [[TMP12]], double* [[TMP10]], align 8 +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func +// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 +// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.0* +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK6-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x double], [2048 x double]* [[E]], i32 0, i32 [[TMP5]] +// CHECK6-NEXT: [[TMP8:%.*]] = bitcast double* [[TMP7]] to i8* +// CHECK6-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK6-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK6-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP10]], i8* [[TMP9]]) #[[ATTR3]] +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_worker +// CHECK6-SAME: () #[[ATTR0]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK6-NEXT: store i8* null, i8** [[WORK_FN]], align 4 +// CHECK6-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK6-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK6: .await.work: +// CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK6-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK6-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK6-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK6-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK6-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK6-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK6: .select.workers: +// CHECK6-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK6-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK6-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK6: .execute.parallel: +// CHECK6-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK6-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK6-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) +// CHECK6-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK6: .terminate.parallel: +// CHECK6-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK6-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK6: .barrier.parallel: +// CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK6-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK6: .exit: +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29 +// CHECK6-SAME: (i32 [[C:%.*]], i32 [[D:%.*]]) #[[ATTR1]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK6-NEXT: store i32 [[C]], i32* [[C_ADDR]], align 4 +// CHECK6-NEXT: store i32 [[D]], i32* [[D_ADDR]], align 4 +// CHECK6-NEXT: [[CONV:%.*]] = bitcast i32* [[C_ADDR]] to i8* +// CHECK6-NEXT: [[CONV1:%.*]] = bitcast i32* [[D_ADDR]] to float* +// CHECK6-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK6-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK6-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK6-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK6-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK6-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK6: .worker: +// CHECK6-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_worker() #[[ATTR3]] +// CHECK6-NEXT: br label [[DOTEXIT:%.*]] +// CHECK6: .mastercheck: +// CHECK6-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK6-NEXT: [[NVPTX_NUM_THREADS3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK6-NEXT: [[NVPTX_WARP_SIZE4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK6-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE4]], 1 +// CHECK6-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS3]], 1 +// CHECK6-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK6-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK6-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID2]], [[MASTER_TID]] +// CHECK6-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK6: .master: +// CHECK6-NEXT: [[NVPTX_NUM_THREADS5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK6-NEXT: [[NVPTX_WARP_SIZE6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK6-NEXT: [[THREAD_LIMIT7:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS5]], [[NVPTX_WARP_SIZE6]] +// CHECK6-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT7]], i16 1) +// CHECK6-NEXT: call void @__kmpc_data_sharing_init_stack() +// CHECK6-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 8, i16 1) +// CHECK6-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.1* +// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK6-NEXT: [[C8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 1 +// CHECK6-NEXT: store i8 [[TMP7]], i8* [[C8]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load float, float* [[CONV1]], align 4 +// CHECK6-NEXT: [[D9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 0 +// CHECK6-NEXT: store float [[TMP8]], float* [[D9]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK6-NEXT: store i32 [[TMP9]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK6-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C8]], float* [[D9]]) #[[ATTR3]] +// CHECK6-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP5]]) +// CHECK6-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK6: .termination.notifier: +// CHECK6-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK6-NEXT: br label [[DOTEXIT]] +// CHECK6: .exit: +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8* nonnull align 1 dereferenceable(1) [[C:%.*]], float* nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 +// CHECK6-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK6-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 4 +// CHECK6-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 4 +// CHECK6-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 8, i16 1) +// CHECK6-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct._globalized_locals_ty.2* +// CHECK6-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], %struct._globalized_locals_ty.2* [[TMP3]], i32 0, i32 1 +// CHECK6-NEXT: [[D2:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], %struct._globalized_locals_ty.2* [[TMP3]], i32 0, i32 0 +// CHECK6-NEXT: store i8 0, i8* [[C1]], align 4 +// CHECK6-NEXT: store float 1.000000e+00, float* [[D2]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = load i8, i8* [[C1]], align 4 +// CHECK6-NEXT: [[CONV:%.*]] = sext i8 [[TMP4]] to i32 +// CHECK6-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2 +// CHECK6-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8 +// CHECK6-NEXT: store i8 [[CONV3]], i8* [[C1]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = load float, float* [[D2]], align 4 +// CHECK6-NEXT: [[MUL:%.*]] = fmul float [[TMP5]], 3.300000e+01 +// CHECK6-NEXT: store float [[MUL]], float* [[D2]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK6-NEXT: store i8* [[C1]], i8** [[TMP8]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP10:%.*]] = bitcast float* [[D2]] to i8* +// CHECK6-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK6-NEXT: [[TMP12:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i8* [[TMP12]], i32 2048, i8* [[TMP11]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func3, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func4, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func5, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func6, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func7, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func8) +// CHECK6-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 1 +// CHECK6-NEXT: br i1 [[TMP14]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK6: .omp.reduction.then: +// CHECK6-NEXT: [[TMP15:%.*]] = load i8, i8* [[TMP0]], align 1 +// CHECK6-NEXT: [[CONV4:%.*]] = sext i8 [[TMP15]] to i32 +// CHECK6-NEXT: [[TMP16:%.*]] = load i8, i8* [[C1]], align 4 +// CHECK6-NEXT: [[CONV5:%.*]] = sext i8 [[TMP16]] to i32 +// CHECK6-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] +// CHECK6-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 +// CHECK6-NEXT: store i8 [[CONV7]], i8* [[TMP0]], align 1 +// CHECK6-NEXT: [[TMP17:%.*]] = load float, float* [[TMP1]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = load float, float* [[D2]], align 4 +// CHECK6-NEXT: [[MUL8:%.*]] = fmul float [[TMP17]], [[TMP18]] +// CHECK6-NEXT: store float [[MUL8]], float* [[TMP1]], align 4 +// CHECK6-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP7]]) +// CHECK6-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK6: .omp.reduction.done: +// CHECK6-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP2]]) +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func3 +// CHECK6-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 +// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 +// CHECK6-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 +// CHECK6-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK6-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i8, align 1 +// CHECK6-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca float, align 4 +// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 +// CHECK6-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 +// CHECK6-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 +// CHECK6-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* +// CHECK6-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 +// CHECK6-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 +// CHECK6-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 +// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[TMP10]], i32 1 +// CHECK6-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP10]], align 1 +// CHECK6-NEXT: [[TMP14:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK6-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK6-NEXT: [[TMP15:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 +// CHECK6-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP14]], i16 [[TMP7]], i16 [[TMP15]]) +// CHECK6-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8 +// CHECK6-NEXT: store i8 [[TMP17]], i8* [[DOTOMP_REDUCTION_ELEMENT]], align 1 +// CHECK6-NEXT: [[TMP18:%.*]] = getelementptr i8, i8* [[TMP10]], i32 1 +// CHECK6-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK6-NEXT: store i8* [[DOTOMP_REDUCTION_ELEMENT]], i8** [[TMP11]], align 4 +// CHECK6-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 4 +// CHECK6-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP23:%.*]] = bitcast i8* [[TMP21]] to float* +// CHECK6-NEXT: [[TMP24:%.*]] = getelementptr float, float* [[TMP23]], i32 1 +// CHECK6-NEXT: [[TMP25:%.*]] = bitcast float* [[TMP24]] to i8* +// CHECK6-NEXT: [[TMP26:%.*]] = bitcast float* [[TMP23]] to i32* +// CHECK6-NEXT: [[TMP27:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i32* +// CHECK6-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK6-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK6-NEXT: [[TMP29:%.*]] = trunc i32 [[NVPTX_WARP_SIZE5]] to i16 +// CHECK6-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP28]], i16 [[TMP7]], i16 [[TMP29]]) +// CHECK6-NEXT: store i32 [[TMP30]], i32* [[TMP27]], align 4 +// CHECK6-NEXT: [[TMP31:%.*]] = getelementptr i32, i32* [[TMP26]], i32 1 +// CHECK6-NEXT: [[TMP32:%.*]] = getelementptr i32, i32* [[TMP27]], i32 1 +// CHECK6-NEXT: [[TMP33:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* +// CHECK6-NEXT: store i8* [[TMP33]], i8** [[TMP22]], align 4 +// CHECK6-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK6-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK6-NEXT: [[TMP36:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK6-NEXT: [[TMP37:%.*]] = and i1 [[TMP35]], [[TMP36]] +// CHECK6-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK6-NEXT: [[TMP39:%.*]] = and i16 [[TMP6]], 1 +// CHECK6-NEXT: [[TMP40:%.*]] = icmp eq i16 [[TMP39]], 0 +// CHECK6-NEXT: [[TMP41:%.*]] = and i1 [[TMP38]], [[TMP40]] +// CHECK6-NEXT: [[TMP42:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK6-NEXT: [[TMP43:%.*]] = and i1 [[TMP41]], [[TMP42]] +// CHECK6-NEXT: [[TMP44:%.*]] = or i1 [[TMP34]], [[TMP37]] +// CHECK6-NEXT: [[TMP45:%.*]] = or i1 [[TMP44]], [[TMP43]] +// CHECK6-NEXT: br i1 [[TMP45]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK6: then: +// CHECK6-NEXT: [[TMP46:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* +// CHECK6-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK6-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP46]], i8* [[TMP47]]) #[[ATTR3]] +// CHECK6-NEXT: br label [[IFCONT:%.*]] +// CHECK6: else: +// CHECK6-NEXT: br label [[IFCONT]] +// CHECK6: ifcont: +// CHECK6-NEXT: [[TMP48:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK6-NEXT: [[TMP49:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK6-NEXT: [[TMP50:%.*]] = and i1 [[TMP48]], [[TMP49]] +// CHECK6-NEXT: br i1 [[TMP50]], label [[THEN6:%.*]], label [[ELSE7:%.*]] +// CHECK6: then6: +// CHECK6-NEXT: [[TMP51:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP52:%.*]] = load i8*, i8** [[TMP51]], align 4 +// CHECK6-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP54:%.*]] = load i8*, i8** [[TMP53]], align 4 +// CHECK6-NEXT: [[TMP55:%.*]] = load i8, i8* [[TMP52]], align 1 +// CHECK6-NEXT: store i8 [[TMP55]], i8* [[TMP54]], align 1 +// CHECK6-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP57:%.*]] = load i8*, i8** [[TMP56]], align 4 +// CHECK6-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP59:%.*]] = load i8*, i8** [[TMP58]], align 4 +// CHECK6-NEXT: [[TMP60:%.*]] = bitcast i8* [[TMP57]] to float* +// CHECK6-NEXT: [[TMP61:%.*]] = bitcast i8* [[TMP59]] to float* +// CHECK6-NEXT: [[TMP62:%.*]] = load float, float* [[TMP60]], align 4 +// CHECK6-NEXT: store float [[TMP62]], float* [[TMP61]], align 4 +// CHECK6-NEXT: br label [[IFCONT8:%.*]] +// CHECK6: else7: +// CHECK6-NEXT: br label [[IFCONT8]] +// CHECK6: ifcont8: +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func4 +// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK6-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK6-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 +// CHECK6-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK6-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 +// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK6-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK6-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK6-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK6: then: +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK6-NEXT: [[TMP8:%.*]] = bitcast i32 addrspace(3)* [[TMP7]] to i8 addrspace(3)* +// CHECK6-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP6]], align 1 +// CHECK6-NEXT: store volatile i8 [[TMP9]], i8 addrspace(3)* [[TMP8]], align 1 +// CHECK6-NEXT: br label [[IFCONT:%.*]] +// CHECK6: else: +// CHECK6-NEXT: br label [[IFCONT]] +// CHECK6: ifcont: +// CHECK6-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] +// CHECK6-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK6: then4: +// CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK6-NEXT: [[TMP12:%.*]] = bitcast i32 addrspace(3)* [[TMP11]] to i8 addrspace(3)* +// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load volatile i8, i8 addrspace(3)* [[TMP12]], align 1 +// CHECK6-NEXT: store i8 [[TMP15]], i8* [[TMP14]], align 1 +// CHECK6-NEXT: br label [[IFCONT6:%.*]] +// CHECK6: else5: +// CHECK6-NEXT: br label [[IFCONT6]] +// CHECK6: ifcont6: +// CHECK6-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK6-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK6-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] +// CHECK6: then8: +// CHECK6-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i32* +// CHECK6-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK6-NEXT: store volatile i32 [[TMP20]], i32 addrspace(3)* [[TMP19]], align 4 +// CHECK6-NEXT: br label [[IFCONT10:%.*]] +// CHECK6: else9: +// CHECK6-NEXT: br label [[IFCONT10]] +// CHECK6: ifcont10: +// CHECK6-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK6-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP21]] +// CHECK6-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] +// CHECK6: then12: +// CHECK6-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK6-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 4 +// CHECK6-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to i32* +// CHECK6-NEXT: [[TMP26:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP22]], align 4 +// CHECK6-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4 +// CHECK6-NEXT: br label [[IFCONT14:%.*]] +// CHECK6: else13: +// CHECK6-NEXT: br label [[IFCONT14]] +// CHECK6: ifcont14: +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func5 +// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK6-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.3* +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 +// CHECK6-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3:%.*]], %struct._globalized_locals_ty.3* [[TMP6]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i8], [2048 x i8]* [[C]], i32 0, i32 [[TMP7]] +// CHECK6-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP9]], align 1 +// CHECK6-NEXT: store i8 [[TMP11]], i8* [[TMP10]], align 128 +// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to float* +// CHECK6-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3]], %struct._globalized_locals_ty.3* [[TMP6]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2048 x float], [2048 x float]* [[D]], i32 0, i32 [[TMP7]] +// CHECK6-NEXT: [[TMP16:%.*]] = load float, float* [[TMP14]], align 4 +// CHECK6-NEXT: store float [[TMP16]], float* [[TMP15]], align 128 +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func6 +// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.3* +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK6-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3:%.*]], %struct._globalized_locals_ty.3* [[TMP4]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i8], [2048 x i8]* [[C]], i32 0, i32 [[TMP5]] +// CHECK6-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK6-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3]], %struct._globalized_locals_ty.3* [[TMP4]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2048 x float], [2048 x float]* [[D]], i32 0, i32 [[TMP5]] +// CHECK6-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to i8* +// CHECK6-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK6-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK6-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP11]], i8* [[TMP12]]) #[[ATTR3]] +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func7 +// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK6-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.3* +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 +// CHECK6-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3:%.*]], %struct._globalized_locals_ty.3* [[TMP6]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i8], [2048 x i8]* [[C]], i32 0, i32 [[TMP7]] +// CHECK6-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 128 +// CHECK6-NEXT: store i8 [[TMP11]], i8* [[TMP9]], align 1 +// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to float* +// CHECK6-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3]], %struct._globalized_locals_ty.3* [[TMP6]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2048 x float], [2048 x float]* [[D]], i32 0, i32 [[TMP7]] +// CHECK6-NEXT: [[TMP16:%.*]] = load float, float* [[TMP15]], align 128 +// CHECK6-NEXT: store float [[TMP16]], float* [[TMP14]], align 4 +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func8 +// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.3* +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK6-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3:%.*]], %struct._globalized_locals_ty.3* [[TMP4]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i8], [2048 x i8]* [[C]], i32 0, i32 [[TMP5]] +// CHECK6-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK6-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3]], %struct._globalized_locals_ty.3* [[TMP4]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2048 x float], [2048 x float]* [[D]], i32 0, i32 [[TMP5]] +// CHECK6-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to i8* +// CHECK6-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK6-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK6-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP12]], i8* [[TMP11]]) #[[ATTR3]] +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l36 +// CHECK6-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) #[[ATTR1]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK6-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK6-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 +// CHECK6-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* +// CHECK6-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK6-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) +// CHECK6-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() +// CHECK6-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK6: .execute: +// CHECK6-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK6-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[A_ADDR]], i16* [[CONV]]) #[[ATTR3]] +// CHECK6-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK6: .omp.deinit: +// CHECK6-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) +// CHECK6-NEXT: br label [[DOTEXIT:%.*]] +// CHECK6: .exit: +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__9 +// CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i16* nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 +// CHECK6-NEXT: [[A1:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[B2:%.*]] = alloca i16, align 2 +// CHECK6-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK6-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK6-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 +// CHECK6-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 +// CHECK6-NEXT: store i32 0, i32* [[A1]], align 4 +// CHECK6-NEXT: store i16 -32768, i16* [[B2]], align 2 +// CHECK6-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP3:%.*]] = bitcast i32* [[A1]] to i8* +// CHECK6-NEXT: store i8* [[TMP3]], i8** [[TMP2]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP5:%.*]] = bitcast i16* [[B2]] to i8* +// CHECK6-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK6-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*)* @__omp_outlined__10 to i8*), i8* null, i8** [[TMP8]], i32 2) +// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP10:%.*]] = bitcast i32* [[A1]] to i8* +// CHECK6-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP12:%.*]] = bitcast i16* [[B2]] to i8* +// CHECK6-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK6-NEXT: [[TMP14:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i8* [[TMP14]], i32 2048, i8* [[TMP13]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func15, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func16, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func17, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func18, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func19, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func20) +// CHECK6-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1 +// CHECK6-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK6: .omp.reduction.then: +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK6-NEXT: [[OR:%.*]] = or i32 [[TMP17]], [[TMP18]] +// CHECK6-NEXT: store i32 [[OR]], i32* [[TMP0]], align 4 +// CHECK6-NEXT: [[TMP19:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK6-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32 +// CHECK6-NEXT: [[TMP20:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK6-NEXT: [[CONV3:%.*]] = sext i16 [[TMP20]] to i32 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV3]] +// CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK6: cond.true: +// CHECK6-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK6-NEXT: br label [[COND_END:%.*]] +// CHECK6: cond.false: +// CHECK6-NEXT: [[TMP22:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK6-NEXT: br label [[COND_END]] +// CHECK6: cond.end: +// CHECK6-NEXT: [[COND:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] +// CHECK6-NEXT: store i16 [[COND]], i16* [[TMP1]], align 2 +// CHECK6-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP7]]) +// CHECK6-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK6: .omp.reduction.done: +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__10 +// CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i16* nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 +// CHECK6-NEXT: [[A1:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[B2:%.*]] = alloca i16, align 2 +// CHECK6-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK6-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 +// CHECK6-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 +// CHECK6-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 +// CHECK6-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 +// CHECK6-NEXT: store i32 0, i32* [[A1]], align 4 +// CHECK6-NEXT: store i16 -32768, i16* [[B2]], align 2 +// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK6-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1 +// CHECK6-NEXT: store i32 [[OR]], i32* [[A1]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK6-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]] +// CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK6: cond.true: +// CHECK6-NEXT: br label [[COND_END:%.*]] +// CHECK6: cond.false: +// CHECK6-NEXT: [[TMP4:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK6-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK6-NEXT: br label [[COND_END]] +// CHECK6: cond.end: +// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] +// CHECK6-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16 +// CHECK6-NEXT: store i16 [[CONV4]], i16* [[B2]], align 2 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP8:%.*]] = bitcast i32* [[A1]] to i8* +// CHECK6-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP10:%.*]] = bitcast i16* [[B2]] to i8* +// CHECK6-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK6-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 2, i32 8, i8* [[TMP11]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func12, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func13) +// CHECK6-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 +// CHECK6-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK6: .omp.reduction.then: +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK6-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]] +// CHECK6-NEXT: store i32 [[OR5]], i32* [[TMP0]], align 4 +// CHECK6-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK6-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK6-NEXT: [[TMP17:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK6-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32 +// CHECK6-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] +// CHECK6-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] +// CHECK6: cond.true9: +// CHECK6-NEXT: [[TMP18:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK6-NEXT: br label [[COND_END11:%.*]] +// CHECK6: cond.false10: +// CHECK6-NEXT: [[TMP19:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK6-NEXT: br label [[COND_END11]] +// CHECK6: cond.end11: +// CHECK6-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ] +// CHECK6-NEXT: store i16 [[COND12]], i16* [[TMP1]], align 2 +// CHECK6-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) +// CHECK6-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK6: .omp.reduction.done: +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func12 +// CHECK6-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 +// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 +// CHECK6-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 +// CHECK6-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK6-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2 +// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 +// CHECK6-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 +// CHECK6-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 +// CHECK6-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* +// CHECK6-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 +// CHECK6-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 +// CHECK6-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 +// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* +// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 +// CHECK6-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK6-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK6-NEXT: [[TMP16:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 +// CHECK6-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP16]]) +// CHECK6-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 +// CHECK6-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK6-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK6-NEXT: store i8* [[TMP20]], i8** [[TMP11]], align 4 +// CHECK6-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 4 +// CHECK6-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP22]] to i16* +// CHECK6-NEXT: [[TMP25:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 +// CHECK6-NEXT: [[TMP26:%.*]] = bitcast i16* [[TMP25]] to i8* +// CHECK6-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP24]], align 2 +// CHECK6-NEXT: [[TMP28:%.*]] = sext i16 [[TMP27]] to i32 +// CHECK6-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK6-NEXT: [[TMP29:%.*]] = trunc i32 [[NVPTX_WARP_SIZE5]] to i16 +// CHECK6-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP28]], i16 [[TMP7]], i16 [[TMP29]]) +// CHECK6-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 +// CHECK6-NEXT: store i16 [[TMP31]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 +// CHECK6-NEXT: [[TMP32:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 +// CHECK6-NEXT: [[TMP33:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 +// CHECK6-NEXT: [[TMP34:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* +// CHECK6-NEXT: store i8* [[TMP34]], i8** [[TMP23]], align 4 +// CHECK6-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK6-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK6-NEXT: [[TMP37:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK6-NEXT: [[TMP38:%.*]] = and i1 [[TMP36]], [[TMP37]] +// CHECK6-NEXT: [[TMP39:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK6-NEXT: [[TMP40:%.*]] = and i16 [[TMP6]], 1 +// CHECK6-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP40]], 0 +// CHECK6-NEXT: [[TMP42:%.*]] = and i1 [[TMP39]], [[TMP41]] +// CHECK6-NEXT: [[TMP43:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK6-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] +// CHECK6-NEXT: [[TMP45:%.*]] = or i1 [[TMP35]], [[TMP38]] +// CHECK6-NEXT: [[TMP46:%.*]] = or i1 [[TMP45]], [[TMP44]] +// CHECK6-NEXT: br i1 [[TMP46]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK6: then: +// CHECK6-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* +// CHECK6-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK6-NEXT: call void @"_omp$reduction$reduction_func11"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR3]] +// CHECK6-NEXT: br label [[IFCONT:%.*]] +// CHECK6: else: +// CHECK6-NEXT: br label [[IFCONT]] +// CHECK6: ifcont: +// CHECK6-NEXT: [[TMP49:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK6-NEXT: [[TMP50:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK6-NEXT: [[TMP51:%.*]] = and i1 [[TMP49]], [[TMP50]] +// CHECK6-NEXT: br i1 [[TMP51]], label [[THEN6:%.*]], label [[ELSE7:%.*]] +// CHECK6: then6: +// CHECK6-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP53:%.*]] = load i8*, i8** [[TMP52]], align 4 +// CHECK6-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP55:%.*]] = load i8*, i8** [[TMP54]], align 4 +// CHECK6-NEXT: [[TMP56:%.*]] = bitcast i8* [[TMP53]] to i32* +// CHECK6-NEXT: [[TMP57:%.*]] = bitcast i8* [[TMP55]] to i32* +// CHECK6-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP56]], align 4 +// CHECK6-NEXT: store i32 [[TMP58]], i32* [[TMP57]], align 4 +// CHECK6-NEXT: [[TMP59:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP60:%.*]] = load i8*, i8** [[TMP59]], align 4 +// CHECK6-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP62:%.*]] = load i8*, i8** [[TMP61]], align 4 +// CHECK6-NEXT: [[TMP63:%.*]] = bitcast i8* [[TMP60]] to i16* +// CHECK6-NEXT: [[TMP64:%.*]] = bitcast i8* [[TMP62]] to i16* +// CHECK6-NEXT: [[TMP65:%.*]] = load i16, i16* [[TMP63]], align 2 +// CHECK6-NEXT: store i16 [[TMP65]], i16* [[TMP64]], align 2 +// CHECK6-NEXT: br label [[IFCONT8:%.*]] +// CHECK6: else7: +// CHECK6-NEXT: br label [[IFCONT8]] +// CHECK6: ifcont8: +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func13 +// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK6-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK6-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 +// CHECK6-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK6-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 +// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK6-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP2]]) +// CHECK6-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK6-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK6: then: +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* +// CHECK6-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK6-NEXT: store volatile i32 [[TMP9]], i32 addrspace(3)* [[TMP8]], align 4 +// CHECK6-NEXT: br label [[IFCONT:%.*]] +// CHECK6: else: +// CHECK6-NEXT: br label [[IFCONT]] +// CHECK6: ifcont: +// CHECK6-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] +// CHECK6-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK6: then4: +// CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to i32* +// CHECK6-NEXT: [[TMP15:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP11]], align 4 +// CHECK6-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK6-NEXT: br label [[IFCONT6:%.*]] +// CHECK6: else5: +// CHECK6-NEXT: br label [[IFCONT6]] +// CHECK6: ifcont6: +// CHECK6-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK6-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK6-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] +// CHECK6: then8: +// CHECK6-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i16* +// CHECK6-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK6-NEXT: [[TMP20:%.*]] = bitcast i32 addrspace(3)* [[TMP19]] to i16 addrspace(3)* +// CHECK6-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP18]], align 2 +// CHECK6-NEXT: store volatile i16 [[TMP21]], i16 addrspace(3)* [[TMP20]], align 2 +// CHECK6-NEXT: br label [[IFCONT10:%.*]] +// CHECK6: else9: +// CHECK6-NEXT: br label [[IFCONT10]] +// CHECK6: ifcont10: +// CHECK6-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK6-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP22]] +// CHECK6-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] +// CHECK6: then12: +// CHECK6-NEXT: [[TMP23:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK6-NEXT: [[TMP24:%.*]] = bitcast i32 addrspace(3)* [[TMP23]] to i16 addrspace(3)* +// CHECK6-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP26:%.*]] = load i8*, i8** [[TMP25]], align 4 +// CHECK6-NEXT: [[TMP27:%.*]] = bitcast i8* [[TMP26]] to i16* +// CHECK6-NEXT: [[TMP28:%.*]] = load volatile i16, i16 addrspace(3)* [[TMP24]], align 2 +// CHECK6-NEXT: store i16 [[TMP28]], i16* [[TMP27]], align 2 +// CHECK6-NEXT: br label [[IFCONT14:%.*]] +// CHECK6: else13: +// CHECK6-NEXT: br label [[IFCONT14]] +// CHECK6: ifcont14: +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func15 +// CHECK6-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 +// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 +// CHECK6-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 +// CHECK6-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK6-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2 +// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 +// CHECK6-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 +// CHECK6-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 +// CHECK6-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* +// CHECK6-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 +// CHECK6-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 +// CHECK6-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 +// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* +// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 +// CHECK6-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK6-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK6-NEXT: [[TMP16:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 +// CHECK6-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP16]]) +// CHECK6-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 +// CHECK6-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK6-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK6-NEXT: store i8* [[TMP20]], i8** [[TMP11]], align 4 +// CHECK6-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 4 +// CHECK6-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP22]] to i16* +// CHECK6-NEXT: [[TMP25:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 +// CHECK6-NEXT: [[TMP26:%.*]] = bitcast i16* [[TMP25]] to i8* +// CHECK6-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP24]], align 2 +// CHECK6-NEXT: [[TMP28:%.*]] = sext i16 [[TMP27]] to i32 +// CHECK6-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK6-NEXT: [[TMP29:%.*]] = trunc i32 [[NVPTX_WARP_SIZE5]] to i16 +// CHECK6-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP28]], i16 [[TMP7]], i16 [[TMP29]]) +// CHECK6-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 +// CHECK6-NEXT: store i16 [[TMP31]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 +// CHECK6-NEXT: [[TMP32:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 +// CHECK6-NEXT: [[TMP33:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 +// CHECK6-NEXT: [[TMP34:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* +// CHECK6-NEXT: store i8* [[TMP34]], i8** [[TMP23]], align 4 +// CHECK6-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK6-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK6-NEXT: [[TMP37:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK6-NEXT: [[TMP38:%.*]] = and i1 [[TMP36]], [[TMP37]] +// CHECK6-NEXT: [[TMP39:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK6-NEXT: [[TMP40:%.*]] = and i16 [[TMP6]], 1 +// CHECK6-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP40]], 0 +// CHECK6-NEXT: [[TMP42:%.*]] = and i1 [[TMP39]], [[TMP41]] +// CHECK6-NEXT: [[TMP43:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK6-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] +// CHECK6-NEXT: [[TMP45:%.*]] = or i1 [[TMP35]], [[TMP38]] +// CHECK6-NEXT: [[TMP46:%.*]] = or i1 [[TMP45]], [[TMP44]] +// CHECK6-NEXT: br i1 [[TMP46]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK6: then: +// CHECK6-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* +// CHECK6-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK6-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR3]] +// CHECK6-NEXT: br label [[IFCONT:%.*]] +// CHECK6: else: +// CHECK6-NEXT: br label [[IFCONT]] +// CHECK6: ifcont: +// CHECK6-NEXT: [[TMP49:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK6-NEXT: [[TMP50:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK6-NEXT: [[TMP51:%.*]] = and i1 [[TMP49]], [[TMP50]] +// CHECK6-NEXT: br i1 [[TMP51]], label [[THEN6:%.*]], label [[ELSE7:%.*]] +// CHECK6: then6: +// CHECK6-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP53:%.*]] = load i8*, i8** [[TMP52]], align 4 +// CHECK6-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP55:%.*]] = load i8*, i8** [[TMP54]], align 4 +// CHECK6-NEXT: [[TMP56:%.*]] = bitcast i8* [[TMP53]] to i32* +// CHECK6-NEXT: [[TMP57:%.*]] = bitcast i8* [[TMP55]] to i32* +// CHECK6-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP56]], align 4 +// CHECK6-NEXT: store i32 [[TMP58]], i32* [[TMP57]], align 4 +// CHECK6-NEXT: [[TMP59:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP60:%.*]] = load i8*, i8** [[TMP59]], align 4 +// CHECK6-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP62:%.*]] = load i8*, i8** [[TMP61]], align 4 +// CHECK6-NEXT: [[TMP63:%.*]] = bitcast i8* [[TMP60]] to i16* +// CHECK6-NEXT: [[TMP64:%.*]] = bitcast i8* [[TMP62]] to i16* +// CHECK6-NEXT: [[TMP65:%.*]] = load i16, i16* [[TMP63]], align 2 +// CHECK6-NEXT: store i16 [[TMP65]], i16* [[TMP64]], align 2 +// CHECK6-NEXT: br label [[IFCONT8:%.*]] +// CHECK6: else7: +// CHECK6-NEXT: br label [[IFCONT8]] +// CHECK6: ifcont8: +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func16 +// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK6-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK6-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 +// CHECK6-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK6-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 +// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK6-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK6-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK6-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK6: then: +// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* +// CHECK6-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK6-NEXT: store volatile i32 [[TMP9]], i32 addrspace(3)* [[TMP8]], align 4 +// CHECK6-NEXT: br label [[IFCONT:%.*]] +// CHECK6: else: +// CHECK6-NEXT: br label [[IFCONT]] +// CHECK6: ifcont: +// CHECK6-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] +// CHECK6-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK6: then4: +// CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to i32* +// CHECK6-NEXT: [[TMP15:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP11]], align 4 +// CHECK6-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK6-NEXT: br label [[IFCONT6:%.*]] +// CHECK6: else5: +// CHECK6-NEXT: br label [[IFCONT6]] +// CHECK6: ifcont6: +// CHECK6-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK6-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK6-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] +// CHECK6: then8: +// CHECK6-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i16* +// CHECK6-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK6-NEXT: [[TMP20:%.*]] = bitcast i32 addrspace(3)* [[TMP19]] to i16 addrspace(3)* +// CHECK6-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP18]], align 2 +// CHECK6-NEXT: store volatile i16 [[TMP21]], i16 addrspace(3)* [[TMP20]], align 2 +// CHECK6-NEXT: br label [[IFCONT10:%.*]] +// CHECK6: else9: +// CHECK6-NEXT: br label [[IFCONT10]] +// CHECK6: ifcont10: +// CHECK6-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK6-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP22]] +// CHECK6-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] +// CHECK6: then12: +// CHECK6-NEXT: [[TMP23:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK6-NEXT: [[TMP24:%.*]] = bitcast i32 addrspace(3)* [[TMP23]] to i16 addrspace(3)* +// CHECK6-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP26:%.*]] = load i8*, i8** [[TMP25]], align 4 +// CHECK6-NEXT: [[TMP27:%.*]] = bitcast i8* [[TMP26]] to i16* +// CHECK6-NEXT: [[TMP28:%.*]] = load volatile i16, i16 addrspace(3)* [[TMP24]], align 2 +// CHECK6-NEXT: store i16 [[TMP28]], i16* [[TMP27]], align 2 +// CHECK6-NEXT: br label [[IFCONT14:%.*]] +// CHECK6: else13: +// CHECK6-NEXT: br label [[IFCONT14]] +// CHECK6: ifcont14: +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func17 +// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK6-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.4* +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* +// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* [[A]], i32 0, i32 [[TMP7]] +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK6-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 128 +// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i16* +// CHECK6-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2048 x i16], [2048 x i16]* [[B]], i32 0, i32 [[TMP7]] +// CHECK6-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP15]], align 2 +// CHECK6-NEXT: store i16 [[TMP17]], i16* [[TMP16]], align 128 +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func18 +// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.4* +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* [[A]], i32 0, i32 [[TMP5]] +// CHECK6-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* +// CHECK6-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK6-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i16], [2048 x i16]* [[B]], i32 0, i32 [[TMP5]] +// CHECK6-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to i8* +// CHECK6-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK6-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK6-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP12]], i8* [[TMP13]]) #[[ATTR3]] +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func19 +// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK6-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.4* +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* +// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* [[A]], i32 0, i32 [[TMP7]] +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 128 +// CHECK6-NEXT: store i32 [[TMP12]], i32* [[TMP10]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i16* +// CHECK6-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2048 x i16], [2048 x i16]* [[B]], i32 0, i32 [[TMP7]] +// CHECK6-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP16]], align 128 +// CHECK6-NEXT: store i16 [[TMP17]], i16* [[TMP15]], align 2 +// CHECK6-NEXT: ret void +// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func20 +// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK6-NEXT: entry: +// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK6-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.4* +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 0 +// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* [[A]], i32 0, i32 [[TMP5]] +// CHECK6-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* +// CHECK6-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK6-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 1 +// CHECK6-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i16], [2048 x i16]* [[B]], i32 0, i32 [[TMP5]] +// CHECK6-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to i8* +// CHECK6-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK6-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK6-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP13]], i8* [[TMP12]]) #[[ATTR3]] +// CHECK6-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_worker +// CHECK1-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: store i8* null, i8** [[WORK_FN]], align 8 +// CHECK1-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK1-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK1: .await.work: +// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK1-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK1-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK1-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 +// CHECK1-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK1-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK1: .select.workers: +// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK1-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK1-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK1: .execute.parallel: +// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK1-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) +// CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK1: .terminate.parallel: +// CHECK1-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK1-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK1: .barrier.parallel: +// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK1-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK1: .exit: +// CHECK1-NEXT: ret void // // -// CHECK4-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func18 -// CHECK4-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK4-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK4-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK4-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK4-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 -// CHECK4-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK4-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 -// CHECK4-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK4-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK4-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK4: then: -// CHECK4-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* -// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK4-NEXT: store volatile i32 [[TMP9]], i32 addrspace(3)* [[TMP8]], align 4 -// CHECK4-NEXT: br label [[IFCONT:%.*]] -// CHECK4: else: -// CHECK4-NEXT: br label [[IFCONT]] -// CHECK4: ifcont: -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK4-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] -// CHECK4-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK4: then4: -// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK4-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to i32* -// CHECK4-NEXT: [[TMP15:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP11]], align 4 -// CHECK4-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 -// CHECK4-NEXT: br label [[IFCONT6:%.*]] -// CHECK4: else5: -// CHECK4-NEXT: br label [[IFCONT6]] -// CHECK4: ifcont6: -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK4-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK4-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] -// CHECK4: then8: -// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 4 -// CHECK4-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i16* -// CHECK4-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK4-NEXT: [[TMP20:%.*]] = bitcast i32 addrspace(3)* [[TMP19]] to i16 addrspace(3)* -// CHECK4-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP18]], align 2 -// CHECK4-NEXT: store volatile i16 [[TMP21]], i16 addrspace(3)* [[TMP20]], align 2 -// CHECK4-NEXT: br label [[IFCONT10:%.*]] -// CHECK4: else9: -// CHECK4-NEXT: br label [[IFCONT10]] -// CHECK4: ifcont10: -// CHECK4-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK4-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP22]] -// CHECK4-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] -// CHECK4: then12: -// CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK4-NEXT: [[TMP24:%.*]] = bitcast i32 addrspace(3)* [[TMP23]] to i16 addrspace(3)* -// CHECK4-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP26:%.*]] = load i8*, i8** [[TMP25]], align 4 -// CHECK4-NEXT: [[TMP27:%.*]] = bitcast i8* [[TMP26]] to i16* -// CHECK4-NEXT: [[TMP28:%.*]] = load volatile i16, i16 addrspace(3)* [[TMP24]], align 2 -// CHECK4-NEXT: store i16 [[TMP28]], i16* [[TMP27]], align 2 -// CHECK4-NEXT: br label [[IFCONT14:%.*]] -// CHECK4: else13: -// CHECK4-NEXT: br label [[IFCONT14]] -// CHECK4: ifcont14: -// CHECK4-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20 +// CHECK1-SAME: (i64 [[E:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[E_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK1-NEXT: store i64 [[E]], i64* [[E_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[E_ADDR]] to double* +// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK1-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK1-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK1: .worker: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_worker() #[[ATTR3:[0-9]+]] +// CHECK1-NEXT: br label [[DOTEXIT:%.*]] +// CHECK1: .mastercheck: +// CHECK1-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK1-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK1-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK1-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK1-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK1-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK1: .master: +// CHECK1-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK1-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load double, double* [[CONV]], align 8 +// CHECK1-NEXT: [[E7:%.*]] = call i8* @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[E_ON_STACK:%.*]] = bitcast i8* [[E7]] to double* +// CHECK1-NEXT: store double [[TMP5]], double* [[E_ON_STACK]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E_ON_STACK]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[E7]]) +// CHECK1-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK1: .termination.notifier: +// CHECK1-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK1-NEXT: br label [[DOTEXIT]] +// CHECK1: .exit: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], double* nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[E_ADDR:%.*]] = alloca double*, align 8 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store double* [[E]], double** [[E_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 8 +// CHECK1-NEXT: [[E1:%.*]] = call i8* @__kmpc_alloc_shared(i64 8) +// CHECK1-NEXT: [[E_ON_STACK:%.*]] = bitcast i8* [[E1]] to double* +// CHECK1-NEXT: store double 0.000000e+00, double* [[E_ON_STACK]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load double, double* [[E_ON_STACK]], align 8 +// CHECK1-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e+00 +// CHECK1-NEXT: store double [[ADD]], double* [[E_ON_STACK]], align 8 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP5:%.*]] = bitcast double* [[E_ON_STACK]] to i8* +// CHECK1-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i8* [[TMP7]], i32 1024, i8* [[TMP6]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func) +// CHECK1-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1 +// CHECK1-NEXT: br i1 [[TMP9]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK1: .omp.reduction.then: +// CHECK1-NEXT: [[TMP10:%.*]] = load double, double* [[TMP0]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load double, double* [[E_ON_STACK]], align 8 +// CHECK1-NEXT: [[ADD2:%.*]] = fadd double [[TMP10]], [[TMP11]] +// CHECK1-NEXT: store double [[ADD2]], double* [[TMP0]], align 8 +// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP3]]) +// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK1: .omp.reduction.done: +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[E1]]) +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func +// CHECK1-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 8 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8 +// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to double* +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP12]], i64 1 +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8* +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP12]] to i64* +// CHECK1-NEXT: [[TMP16:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i64* +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 8 +// CHECK1-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[TMP18:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 +// CHECK1-NEXT: [[TMP19:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP18]]) +// CHECK1-NEXT: store i64 [[TMP19]], i64* [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr i64, i64* [[TMP15]], i64 1 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP16]], i64 1 +// CHECK1-NEXT: [[TMP22:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK1-NEXT: store i8* [[TMP22]], i8** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK1-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP25:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP26:%.*]] = and i1 [[TMP24]], [[TMP25]] +// CHECK1-NEXT: [[TMP27:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK1-NEXT: [[TMP28:%.*]] = and i16 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP29:%.*]] = icmp eq i16 [[TMP28]], 0 +// CHECK1-NEXT: [[TMP30:%.*]] = and i1 [[TMP27]], [[TMP29]] +// CHECK1-NEXT: [[TMP31:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]] +// CHECK1-NEXT: [[TMP33:%.*]] = or i1 [[TMP23]], [[TMP26]] +// CHECK1-NEXT: [[TMP34:%.*]] = or i1 [[TMP33]], [[TMP32]] +// CHECK1-NEXT: br i1 [[TMP34]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK1: then: +// CHECK1-NEXT: [[TMP35:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* +// CHECK1-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR3]] +// CHECK1-NEXT: br label [[IFCONT:%.*]] +// CHECK1: else: +// CHECK1-NEXT: br label [[IFCONT]] +// CHECK1: ifcont: +// CHECK1-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] +// CHECK1-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK1: then4: +// CHECK1-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP41:%.*]] = load i8*, i8** [[TMP40]], align 8 +// CHECK1-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 8 +// CHECK1-NEXT: [[TMP44:%.*]] = bitcast i8* [[TMP41]] to double* +// CHECK1-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP43]] to double* +// CHECK1-NEXT: [[TMP46:%.*]] = load double, double* [[TMP44]], align 8 +// CHECK1-NEXT: store double [[TMP46]], double* [[TMP45]], align 8 +// CHECK1-NEXT: br label [[IFCONT6:%.*]] +// CHECK1: else5: +// CHECK1-NEXT: br label [[IFCONT6]] +// CHECK1: ifcont6: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func +// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK1-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK1-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 +// CHECK1-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* +// CHECK1-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4 +// CHECK1-NEXT: br label [[PRECOND:%.*]] +// CHECK1: precond: +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP5]], 2 +// CHECK1-NEXT: br i1 [[TMP6]], label [[BODY:%.*]], label [[EXIT:%.*]] +// CHECK1: body: +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK1: then: +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[TMP9]], i32 [[TMP5]] +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK1-NEXT: store volatile i32 [[TMP12]], i32 addrspace(3)* [[TMP11]], align 4 +// CHECK1-NEXT: br label [[IFCONT:%.*]] +// CHECK1: else: +// CHECK1-NEXT: br label [[IFCONT]] +// CHECK1: ifcont: +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP13]] +// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK1: then4: +// CHECK1-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i32* +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP17]], i32 [[TMP5]] +// CHECK1-NEXT: [[TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4 +// CHECK1-NEXT: br label [[IFCONT6:%.*]] +// CHECK1: else5: +// CHECK1-NEXT: br label [[IFCONT6]] +// CHECK1: ifcont6: +// CHECK1-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK1-NEXT: store i32 [[TMP20]], i32* [[DOTCNT_ADDR]], align 4 +// CHECK1-NEXT: br label [[PRECOND]] +// CHECK1: exit: +// CHECK1-NEXT: ret void // // -// CHECK4-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func19 -// CHECK4-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK4-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK4-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK4-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.4* -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* -// CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* [[A]], i32 0, i32 [[TMP7]] -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK4-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 128 -// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i16* -// CHECK4-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2048 x i16], [2048 x i16]* [[B]], i32 0, i32 [[TMP7]] -// CHECK4-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP15]], align 2 -// CHECK4-NEXT: store i16 [[TMP17]], i16* [[TMP16]], align 128 -// CHECK4-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func +// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty* +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to double* +// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x double], [1024 x double]* [[E]], i32 0, i32 [[TMP7]] +// CHECK1-NEXT: [[TMP12:%.*]] = load double, double* [[TMP10]], align 8 +// CHECK1-NEXT: store double [[TMP12]], double* [[TMP11]], align 128 +// CHECK1-NEXT: ret void // // -// CHECK4-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func20 -// CHECK4-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK4-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK4-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK4-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.4* -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* [[A]], i32 0, i32 [[TMP5]] -// CHECK4-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* -// CHECK4-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK4-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i16], [2048 x i16]* [[B]], i32 0, i32 [[TMP5]] -// CHECK4-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to i8* -// CHECK4-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK4-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK4-NEXT: call void @"_omp$reduction$reduction_func16"(i8* [[TMP12]], i8* [[TMP13]]) #[[ATTR3]] -// CHECK4-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func +// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 +// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty* +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x double], [1024 x double]* [[E]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast double* [[TMP7]] to i8* +// CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP9]], i8* [[TMP10]]) #[[ATTR3]] +// CHECK1-NEXT: ret void // // -// CHECK4-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func21 -// CHECK4-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK4-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK4-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK4-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.4* -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* -// CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* [[A]], i32 0, i32 [[TMP7]] -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 128 -// CHECK4-NEXT: store i32 [[TMP12]], i32* [[TMP10]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i16* -// CHECK4-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2048 x i16], [2048 x i16]* [[B]], i32 0, i32 [[TMP7]] -// CHECK4-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP16]], align 128 -// CHECK4-NEXT: store i16 [[TMP17]], i16* [[TMP15]], align 2 -// CHECK4-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func +// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty* +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to double* +// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x double], [1024 x double]* [[E]], i32 0, i32 [[TMP7]] +// CHECK1-NEXT: [[TMP12:%.*]] = load double, double* [[TMP11]], align 128 +// CHECK1-NEXT: store double [[TMP12]], double* [[TMP10]], align 8 +// CHECK1-NEXT: ret void // // -// CHECK4-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func22 -// CHECK4-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK4-NEXT: entry: -// CHECK4-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK4-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK4-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK4-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK4-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK4-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK4-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK4-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK4-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.4* -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* [[A]], i32 0, i32 [[TMP5]] -// CHECK4-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* -// CHECK4-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK4-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i16], [2048 x i16]* [[B]], i32 0, i32 [[TMP5]] -// CHECK4-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to i8* -// CHECK4-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK4-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK4-NEXT: call void @"_omp$reduction$reduction_func16"(i8* [[TMP13]], i8* [[TMP12]]) #[[ATTR3]] -// CHECK4-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func +// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 8 +// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty* +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x double], [1024 x double]* [[E]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast double* [[TMP7]] to i8* +// CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP10]], i8* [[TMP9]]) #[[ATTR3]] +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_worker +// CHECK1-SAME: () #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: store i8* null, i8** [[WORK_FN]], align 8 +// CHECK1-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK1-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK1: .await.work: +// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK1-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK1-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK1-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 8 +// CHECK1-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK1-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK1: .select.workers: +// CHECK1-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK1-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK1-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK1: .execute.parallel: +// CHECK1-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK1-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) +// CHECK1-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK1: .terminate.parallel: +// CHECK1-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK1-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK1: .barrier.parallel: +// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK1-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK1: .exit: +// CHECK1-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_worker -// CHECK5-SAME: () #[[ATTR0:[0-9]+]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK5-NEXT: store i8* null, i8** [[WORK_FN]], align 4 -// CHECK5-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK5-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK5: .await.work: -// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK5-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK5-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK5-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK5-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK5-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK5-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK5: .select.workers: -// CHECK5-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK5-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK5-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK5: .execute.parallel: -// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) -// CHECK5-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK5-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) -// CHECK5-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK5: .terminate.parallel: -// CHECK5-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK5-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK5: .barrier.parallel: -// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK5-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK5: .exit: -// CHECK5-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26 +// CHECK1-SAME: (i64 [[C:%.*]], i64 [[D:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK1-NEXT: store i64 [[C]], i64* [[C_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[D]], i64* [[D_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[C_ADDR]] to i8* +// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[D_ADDR]] to float* +// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK1-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK1-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK1: .worker: +// CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_worker() #[[ATTR3]] +// CHECK1-NEXT: br label [[DOTEXIT:%.*]] +// CHECK1: .mastercheck: +// CHECK1-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK1-NEXT: [[NVPTX_NUM_THREADS3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[NVPTX_WARP_SIZE4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE4]], 1 +// CHECK1-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS3]], 1 +// CHECK1-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK1-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK1-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID2]], [[MASTER_TID]] +// CHECK1-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK1: .master: +// CHECK1-NEXT: [[NVPTX_NUM_THREADS5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: [[NVPTX_WARP_SIZE6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[THREAD_LIMIT7:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS5]], [[NVPTX_WARP_SIZE6]] +// CHECK1-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT7]], i16 1) +// CHECK1-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[C8:%.*]] = call i8* @__kmpc_alloc_shared(i64 1) +// CHECK1-NEXT: store i8 [[TMP5]], i8* [[C8]], align 1 +// CHECK1-NEXT: [[TMP6:%.*]] = load float, float* [[CONV1]], align 8 +// CHECK1-NEXT: [[D9:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D9]] to float* +// CHECK1-NEXT: store float [[TMP6]], float* [[D_ON_STACK]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C8]], float* [[D_ON_STACK]]) #[[ATTR3]] +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[D9]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[C8]]) +// CHECK1-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK1: .termination.notifier: +// CHECK1-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK1-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK1-NEXT: br label [[DOTEXIT]] +// CHECK1: .exit: +// CHECK1-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23 -// CHECK5-SAME: (double* nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[E_ADDR:%.*]] = alloca double*, align 4 -// CHECK5-NEXT: [[E7:%.*]] = alloca double, align 8 -// CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK5-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 -// CHECK5-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK5-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK5-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK5-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK5-NEXT: br i1 [[TMP1]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK5: .worker: -// CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_worker() #[[ATTR3:[0-9]+]] -// CHECK5-NEXT: br label [[DOTEXIT:%.*]] -// CHECK5: .mastercheck: -// CHECK5-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK5-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK5-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK5-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK5-NEXT: [[TMP3:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK5-NEXT: [[TMP4:%.*]] = xor i32 [[TMP2]], -1 -// CHECK5-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP3]], [[TMP4]] -// CHECK5-NEXT: [[TMP5:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK5-NEXT: br i1 [[TMP5]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK5: .master: -// CHECK5-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK5-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK5-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK5-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK5-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK5-NEXT: [[TMP7:%.*]] = load double, double* [[TMP0]], align 8 -// CHECK5-NEXT: store double [[TMP7]], double* [[E7]], align 8 -// CHECK5-NEXT: store i32 [[TMP6]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK5-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E7]]) #[[ATTR3]] -// CHECK5-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK5: .termination.notifier: -// CHECK5-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK5-NEXT: br label [[DOTEXIT]] -// CHECK5: .exit: -// CHECK5-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8* nonnull align 1 dereferenceable(1) [[C:%.*]], float* nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[D_ADDR:%.*]] = alloca float*, align 8 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 8 +// CHECK1-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 8 +// CHECK1-NEXT: [[C1:%.*]] = call i8* @__kmpc_alloc_shared(i64 1) +// CHECK1-NEXT: [[D2:%.*]] = call i8* @__kmpc_alloc_shared(i64 4) +// CHECK1-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D2]] to float* +// CHECK1-NEXT: store i8 0, i8* [[C1]], align 1 +// CHECK1-NEXT: store float 1.000000e+00, float* [[D_ON_STACK]], align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = load i8, i8* [[C1]], align 1 +// CHECK1-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 +// CHECK1-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2 +// CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8 +// CHECK1-NEXT: store i8 [[CONV3]], i8* [[C1]], align 1 +// CHECK1-NEXT: [[TMP3:%.*]] = load float, float* [[D_ON_STACK]], align 4 +// CHECK1-NEXT: [[MUL:%.*]] = fmul float [[TMP3]], 3.300000e+01 +// CHECK1-NEXT: store float [[MUL]], float* [[D_ON_STACK]], align 4 +// CHECK1-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: store i8* [[C1]], i8** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast float* [[D_ON_STACK]] to i8* +// CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i8* [[TMP10]], i32 1024, i8* [[TMP9]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func3, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func4, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func5, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func6, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func7, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func8) +// CHECK1-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP11]], 1 +// CHECK1-NEXT: br i1 [[TMP12]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK1: .omp.reduction.then: +// CHECK1-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP0]], align 1 +// CHECK1-NEXT: [[CONV4:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK1-NEXT: [[TMP14:%.*]] = load i8, i8* [[C1]], align 1 +// CHECK1-NEXT: [[CONV5:%.*]] = sext i8 [[TMP14]] to i32 +// CHECK1-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] +// CHECK1-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 +// CHECK1-NEXT: store i8 [[CONV7]], i8* [[TMP0]], align 1 +// CHECK1-NEXT: [[TMP15:%.*]] = load float, float* [[TMP1]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load float, float* [[D_ON_STACK]], align 4 +// CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP16]] +// CHECK1-NEXT: store float [[MUL8]], float* [[TMP1]], align 4 +// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) +// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK1: .omp.reduction.done: +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[D2]]) +// CHECK1-NEXT: call void @__kmpc_free_shared(i8* [[C1]]) +// CHECK1-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], double* nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[E_ADDR:%.*]] = alloca double*, align 4 -// CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 -// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 8, i16 1) -// CHECK5-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct._globalized_locals_ty* -// CHECK5-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP2]], i32 0, i32 0 -// CHECK5-NEXT: store double 0.000000e+00, double* [[E1]], align 8 -// CHECK5-NEXT: [[TMP3:%.*]] = load double, double* [[E1]], align 8 -// CHECK5-NEXT: [[ADD:%.*]] = fadd double [[TMP3]], 5.000000e+00 -// CHECK5-NEXT: store double [[ADD]], double* [[E1]], align 8 -// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP7:%.*]] = bitcast double* [[E1]] to i8* -// CHECK5-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK5-NEXT: [[TMP9:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i8* [[TMP9]], i32 1024, i8* [[TMP8]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func) -// CHECK5-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 1 -// CHECK5-NEXT: br i1 [[TMP11]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] -// CHECK5: .omp.reduction.then: -// CHECK5-NEXT: [[TMP12:%.*]] = load double, double* [[TMP0]], align 8 -// CHECK5-NEXT: [[TMP13:%.*]] = load double, double* [[E1]], align 8 -// CHECK5-NEXT: [[ADD2:%.*]] = fadd double [[TMP12]], [[TMP13]] -// CHECK5-NEXT: store double [[ADD2]], double* [[TMP0]], align 8 -// CHECK5-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) -// CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DONE]] -// CHECK5: .omp.reduction.done: -// CHECK5-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP1]]) -// CHECK5-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func3 +// CHECK1-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i8, align 1 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca float, align 4 +// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[TMP10]], i64 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP10]], align 1 +// CHECK1-NEXT: [[TMP14:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK1-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[TMP15:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 +// CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP14]], i16 [[TMP7]], i16 [[TMP15]]) +// CHECK1-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8 +// CHECK1-NEXT: store i8 [[TMP17]], i8* [[DOTOMP_REDUCTION_ELEMENT]], align 1 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr i8, i8* [[TMP10]], i64 1 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[DOTOMP_REDUCTION_ELEMENT]], i64 1 +// CHECK1-NEXT: store i8* [[DOTOMP_REDUCTION_ELEMENT]], i8** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP23:%.*]] = bitcast i8* [[TMP21]] to float* +// CHECK1-NEXT: [[TMP24:%.*]] = getelementptr float, float* [[TMP23]], i64 1 +// CHECK1-NEXT: [[TMP25:%.*]] = bitcast float* [[TMP24]] to i8* +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast float* [[TMP23]] to i32* +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i32* +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK1-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[TMP29:%.*]] = trunc i32 [[NVPTX_WARP_SIZE5]] to i16 +// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP28]], i16 [[TMP7]], i16 [[TMP29]]) +// CHECK1-NEXT: store i32 [[TMP30]], i32* [[TMP27]], align 4 +// CHECK1-NEXT: [[TMP31:%.*]] = getelementptr i32, i32* [[TMP26]], i64 1 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr i32, i32* [[TMP27]], i64 1 +// CHECK1-NEXT: [[TMP33:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* +// CHECK1-NEXT: store i8* [[TMP33]], i8** [[TMP22]], align 8 +// CHECK1-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK1-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP36:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP37:%.*]] = and i1 [[TMP35]], [[TMP36]] +// CHECK1-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK1-NEXT: [[TMP39:%.*]] = and i16 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP40:%.*]] = icmp eq i16 [[TMP39]], 0 +// CHECK1-NEXT: [[TMP41:%.*]] = and i1 [[TMP38]], [[TMP40]] +// CHECK1-NEXT: [[TMP42:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP43:%.*]] = and i1 [[TMP41]], [[TMP42]] +// CHECK1-NEXT: [[TMP44:%.*]] = or i1 [[TMP34]], [[TMP37]] +// CHECK1-NEXT: [[TMP45:%.*]] = or i1 [[TMP44]], [[TMP43]] +// CHECK1-NEXT: br i1 [[TMP45]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK1: then: +// CHECK1-NEXT: [[TMP46:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* +// CHECK1-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP46]], i8* [[TMP47]]) #[[ATTR3]] +// CHECK1-NEXT: br label [[IFCONT:%.*]] +// CHECK1: else: +// CHECK1-NEXT: br label [[IFCONT]] +// CHECK1: ifcont: +// CHECK1-NEXT: [[TMP48:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP49:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP50:%.*]] = and i1 [[TMP48]], [[TMP49]] +// CHECK1-NEXT: br i1 [[TMP50]], label [[THEN6:%.*]], label [[ELSE7:%.*]] +// CHECK1: then6: +// CHECK1-NEXT: [[TMP51:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP52:%.*]] = load i8*, i8** [[TMP51]], align 8 +// CHECK1-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP54:%.*]] = load i8*, i8** [[TMP53]], align 8 +// CHECK1-NEXT: [[TMP55:%.*]] = load i8, i8* [[TMP52]], align 1 +// CHECK1-NEXT: store i8 [[TMP55]], i8* [[TMP54]], align 1 +// CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP57:%.*]] = load i8*, i8** [[TMP56]], align 8 +// CHECK1-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP59:%.*]] = load i8*, i8** [[TMP58]], align 8 +// CHECK1-NEXT: [[TMP60:%.*]] = bitcast i8* [[TMP57]] to float* +// CHECK1-NEXT: [[TMP61:%.*]] = bitcast i8* [[TMP59]] to float* +// CHECK1-NEXT: [[TMP62:%.*]] = load float, float* [[TMP60]], align 4 +// CHECK1-NEXT: store float [[TMP62]], float* [[TMP61]], align 4 +// CHECK1-NEXT: br label [[IFCONT8:%.*]] +// CHECK1: else7: +// CHECK1-NEXT: br label [[IFCONT8]] +// CHECK1: ifcont8: +// CHECK1-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func -// CHECK5-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 -// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 -// CHECK5-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK5-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 4 -// CHECK5-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8 -// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 -// CHECK5-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 -// CHECK5-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 -// CHECK5-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* -// CHECK5-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 -// CHECK5-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 -// CHECK5-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to double* -// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP12]], i32 1 -// CHECK5-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8* -// CHECK5-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP12]] to i64* -// CHECK5-NEXT: [[TMP16:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i64* -// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 8 -// CHECK5-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK5-NEXT: [[TMP18:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 -// CHECK5-NEXT: [[TMP19:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP18]]) -// CHECK5-NEXT: store i64 [[TMP19]], i64* [[TMP16]], align 8 -// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr i64, i64* [[TMP15]], i32 1 -// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP16]], i32 1 -// CHECK5-NEXT: [[TMP22:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK5-NEXT: store i8* [[TMP22]], i8** [[TMP11]], align 4 -// CHECK5-NEXT: [[TMP23:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK5-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK5-NEXT: [[TMP25:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK5-NEXT: [[TMP26:%.*]] = and i1 [[TMP24]], [[TMP25]] -// CHECK5-NEXT: [[TMP27:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK5-NEXT: [[TMP28:%.*]] = and i16 [[TMP6]], 1 -// CHECK5-NEXT: [[TMP29:%.*]] = icmp eq i16 [[TMP28]], 0 -// CHECK5-NEXT: [[TMP30:%.*]] = and i1 [[TMP27]], [[TMP29]] -// CHECK5-NEXT: [[TMP31:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK5-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]] -// CHECK5-NEXT: [[TMP33:%.*]] = or i1 [[TMP23]], [[TMP26]] -// CHECK5-NEXT: [[TMP34:%.*]] = or i1 [[TMP33]], [[TMP32]] -// CHECK5-NEXT: br i1 [[TMP34]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK5: then: -// CHECK5-NEXT: [[TMP35:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* -// CHECK5-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK5-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR3]] -// CHECK5-NEXT: br label [[IFCONT:%.*]] -// CHECK5: else: -// CHECK5-NEXT: br label [[IFCONT]] -// CHECK5: ifcont: -// CHECK5-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK5-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK5-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] -// CHECK5-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK5: then4: -// CHECK5-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP41:%.*]] = load i8*, i8** [[TMP40]], align 4 -// CHECK5-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 4 -// CHECK5-NEXT: [[TMP44:%.*]] = bitcast i8* [[TMP41]] to double* -// CHECK5-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP43]] to double* -// CHECK5-NEXT: [[TMP46:%.*]] = load double, double* [[TMP44]], align 8 -// CHECK5-NEXT: store double [[TMP46]], double* [[TMP45]], align 8 -// CHECK5-NEXT: br label [[IFCONT6:%.*]] -// CHECK5: else5: -// CHECK5-NEXT: br label [[IFCONT6]] -// CHECK5: ifcont6: -// CHECK5-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func4 +// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK1-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK1-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 +// CHECK1-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK1: then: +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32 addrspace(3)* [[TMP7]] to i8 addrspace(3)* +// CHECK1-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP6]], align 1 +// CHECK1-NEXT: store volatile i8 [[TMP9]], i8 addrspace(3)* [[TMP8]], align 1 +// CHECK1-NEXT: br label [[IFCONT:%.*]] +// CHECK1: else: +// CHECK1-NEXT: br label [[IFCONT]] +// CHECK1: ifcont: +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] +// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK1: then4: +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i32 addrspace(3)* [[TMP11]] to i8 addrspace(3)* +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load volatile i8, i8 addrspace(3)* [[TMP12]], align 1 +// CHECK1-NEXT: store i8 [[TMP15]], i8* [[TMP14]], align 1 +// CHECK1-NEXT: br label [[IFCONT6:%.*]] +// CHECK1: else5: +// CHECK1-NEXT: br label [[IFCONT6]] +// CHECK1: ifcont6: +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK1-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] +// CHECK1: then8: +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i32* +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK1-NEXT: store volatile i32 [[TMP20]], i32 addrspace(3)* [[TMP19]], align 4 +// CHECK1-NEXT: br label [[IFCONT10:%.*]] +// CHECK1: else9: +// CHECK1-NEXT: br label [[IFCONT10]] +// CHECK1: ifcont10: +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP21]] +// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] +// CHECK1: then12: +// CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to i32* +// CHECK1-NEXT: [[TMP26:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP22]], align 4 +// CHECK1-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4 +// CHECK1-NEXT: br label [[IFCONT14:%.*]] +// CHECK1: else13: +// CHECK1-NEXT: br label [[IFCONT14]] +// CHECK1: ifcont14: +// CHECK1-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func -// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK5-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK5-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 -// CHECK5-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK5-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 -// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* -// CHECK5-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4 -// CHECK5-NEXT: br label [[PRECOND:%.*]] -// CHECK5: precond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP5]], 2 -// CHECK5-NEXT: br i1 [[TMP6]], label [[BODY:%.*]], label [[EXIT:%.*]] -// CHECK5: body: -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) -// CHECK5-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK5-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK5: then: -// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* -// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[TMP9]], i32 [[TMP5]] -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK5-NEXT: store volatile i32 [[TMP12]], i32 addrspace(3)* [[TMP11]], align 4 -// CHECK5-NEXT: br label [[IFCONT:%.*]] -// CHECK5: else: -// CHECK5-NEXT: br label [[IFCONT]] -// CHECK5: ifcont: -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP13]] -// CHECK5-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK5: then4: -// CHECK5-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i32* -// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP17]], i32 [[TMP5]] -// CHECK5-NEXT: [[TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4 -// CHECK5-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4 -// CHECK5-NEXT: br label [[IFCONT6:%.*]] -// CHECK5: else5: -// CHECK5-NEXT: br label [[IFCONT6]] -// CHECK5: ifcont6: -// CHECK5-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK5-NEXT: store i32 [[TMP20]], i32* [[DOTCNT_ADDR]], align 4 -// CHECK5-NEXT: br label [[PRECOND]] -// CHECK5: exit: -// CHECK5-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func5 +// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.0* +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8 +// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP7]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP9]], align 1 +// CHECK1-NEXT: store i8 [[TMP11]], i8* [[TMP10]], align 128 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to float* +// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP7]] +// CHECK1-NEXT: [[TMP16:%.*]] = load float, float* [[TMP14]], align 4 +// CHECK1-NEXT: store float [[TMP16]], float* [[TMP15]], align 128 +// CHECK1-NEXT: ret void +// // +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func6 +// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.0* +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to i8* +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP11]], i8* [[TMP12]]) #[[ATTR3]] +// CHECK1-NEXT: ret void // -// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func -// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* -// CHECK5-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.0* -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to double* -// CHECK5-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x double], [1024 x double]* [[E]], i32 0, i32 [[TMP7]] -// CHECK5-NEXT: [[TMP12:%.*]] = load double, double* [[TMP10]], align 8 -// CHECK5-NEXT: store double [[TMP12]], double* [[TMP11]], align 128 -// CHECK5-NEXT: ret void // +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func7 +// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.0* +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8 +// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP7]] +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 128 +// CHECK1-NEXT: store i8 [[TMP11]], i8* [[TMP9]], align 1 +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to float* +// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP7]] +// CHECK1-NEXT: [[TMP16:%.*]] = load float, float* [[TMP15]], align 128 +// CHECK1-NEXT: store float [[TMP16]], float* [[TMP14]], align 4 +// CHECK1-NEXT: ret void // -// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func -// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 -// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.0* -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK5-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x double], [1024 x double]* [[E]], i32 0, i32 [[TMP5]] -// CHECK5-NEXT: [[TMP8:%.*]] = bitcast double* [[TMP7]] to i8* -// CHECK5-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK5-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK5-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP9]], i8* [[TMP10]]) #[[ATTR3]] -// CHECK5-NEXT: ret void // +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func8 +// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.0* +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to i8* +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP12]], i8* [[TMP11]]) #[[ATTR3]] +// CHECK1-NEXT: ret void // -// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func -// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* -// CHECK5-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.0* -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to double* -// CHECK5-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x double], [1024 x double]* [[E]], i32 0, i32 [[TMP7]] -// CHECK5-NEXT: [[TMP12:%.*]] = load double, double* [[TMP11]], align 128 -// CHECK5-NEXT: store double [[TMP12]], double* [[TMP10]], align 8 -// CHECK5-NEXT: ret void +// +// CHECK1-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33 +// CHECK1-SAME: (i64 [[A:%.*]], i64 [[B:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i64, align 8 +// CHECK1-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK1-NEXT: store i64 [[A]], i64* [[A_ADDR]], align 8 +// CHECK1-NEXT: store i64 [[B]], i64* [[B_ADDR]], align 8 +// CHECK1-NEXT: [[CONV:%.*]] = bitcast i64* [[A_ADDR]] to i32* +// CHECK1-NEXT: [[CONV1:%.*]] = bitcast i64* [[B_ADDR]] to i16* +// CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) +// CHECK1-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK1: .execute: +// CHECK1-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK1-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[CONV]], i16* [[CONV1]]) #[[ATTR3]] +// CHECK1-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK1: .omp.deinit: +// CHECK1-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) +// CHECK1-NEXT: br label [[DOTEXIT:%.*]] +// CHECK1: .exit: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__9 +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i16* nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 8 +// CHECK1-NEXT: [[A1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B2:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 +// CHECK1-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[A1]], align 4 +// CHECK1-NEXT: store i16 -32768, i16* [[B2]], align 2 +// CHECK1-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP3:%.*]] = bitcast i32* [[A1]] to i8* +// CHECK1-NEXT: store i8* [[TMP3]], i8** [[TMP2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i16* [[B2]] to i8* +// CHECK1-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK1-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*)* @__omp_outlined__10 to i8*), i8* null, i8** [[TMP8]], i64 2) +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i32* [[A1]] to i8* +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i16* [[B2]] to i8* +// CHECK1-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i8* [[TMP14]], i32 1024, i8* [[TMP13]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func15, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func16, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func17, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func18, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func19, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func20) +// CHECK1-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1 +// CHECK1-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK1: .omp.reduction.then: +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK1-NEXT: [[OR:%.*]] = or i32 [[TMP17]], [[TMP18]] +// CHECK1-NEXT: store i32 [[OR]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP19:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32 +// CHECK1-NEXT: [[TMP20:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP20]] to i32 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV3]] +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP22:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] +// CHECK1-NEXT: store i16 [[COND]], i16* [[TMP1]], align 2 +// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP7]]) +// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK1: .omp.reduction.done: +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@__omp_outlined__10 +// CHECK1-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i16* nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 8 +// CHECK1-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 8 +// CHECK1-NEXT: [[A1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[B2:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 +// CHECK1-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 8 +// CHECK1-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 8 +// CHECK1-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 8 +// CHECK1-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 8 +// CHECK1-NEXT: store i32 0, i32* [[A1]], align 4 +// CHECK1-NEXT: store i16 -32768, i16* [[B2]], align 2 +// CHECK1-NEXT: [[TMP2:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK1-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1 +// CHECK1-NEXT: store i32 [[OR]], i32* [[A1]], align 4 +// CHECK1-NEXT: [[TMP3:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK1-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK1-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]] +// CHECK1-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK1: cond.true: +// CHECK1-NEXT: br label [[COND_END:%.*]] +// CHECK1: cond.false: +// CHECK1-NEXT: [[TMP4:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK1-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK1-NEXT: br label [[COND_END]] +// CHECK1: cond.end: +// CHECK1-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] +// CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16 +// CHECK1-NEXT: store i16 [[CONV4]], i16* [[B2]], align 2 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32* [[A1]] to i8* +// CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i16* [[B2]] to i8* +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 2, i64 16, i8* [[TMP11]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func12, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func13) +// CHECK1-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 +// CHECK1-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK1: .omp.reduction.then: +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK1-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]] +// CHECK1-NEXT: store i32 [[OR5]], i32* [[TMP0]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK1-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK1-NEXT: [[TMP17:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK1-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32 +// CHECK1-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] +// CHECK1-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] +// CHECK1: cond.true9: +// CHECK1-NEXT: [[TMP18:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK1-NEXT: br label [[COND_END11:%.*]] +// CHECK1: cond.false10: +// CHECK1-NEXT: [[TMP19:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK1-NEXT: br label [[COND_END11]] +// CHECK1: cond.end11: +// CHECK1-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ] +// CHECK1-NEXT: store i16 [[COND12]], i16* [[TMP1]], align 2 +// CHECK1-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) +// CHECK1-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK1: .omp.reduction.done: +// CHECK1-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func -// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 -// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.0* -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK5-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x double], [1024 x double]* [[E]], i32 0, i32 [[TMP5]] -// CHECK5-NEXT: [[TMP8:%.*]] = bitcast double* [[TMP7]] to i8* -// CHECK5-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK5-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK5-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP10]], i8* [[TMP9]]) #[[ATTR3]] -// CHECK5-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func12 +// CHECK1-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i64 1 +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK1-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[TMP16:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 +// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP16]]) +// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP12]], i64 1 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i64 1 +// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK1-NEXT: store i8* [[TMP20]], i8** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP22]] to i16* +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr i16, i16* [[TMP24]], i64 1 +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i16* [[TMP25]] to i8* +// CHECK1-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP24]], align 2 +// CHECK1-NEXT: [[TMP28:%.*]] = sext i16 [[TMP27]] to i32 +// CHECK1-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[TMP29:%.*]] = trunc i32 [[NVPTX_WARP_SIZE5]] to i16 +// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP28]], i16 [[TMP7]], i16 [[TMP29]]) +// CHECK1-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 +// CHECK1-NEXT: store i16 [[TMP31]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr i16, i16* [[TMP24]], i64 1 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i64 1 +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* +// CHECK1-NEXT: store i8* [[TMP34]], i8** [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK1-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP38:%.*]] = and i1 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: [[TMP39:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK1-NEXT: [[TMP40:%.*]] = and i16 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP40]], 0 +// CHECK1-NEXT: [[TMP42:%.*]] = and i1 [[TMP39]], [[TMP41]] +// CHECK1-NEXT: [[TMP43:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: [[TMP45:%.*]] = or i1 [[TMP35]], [[TMP38]] +// CHECK1-NEXT: [[TMP46:%.*]] = or i1 [[TMP45]], [[TMP44]] +// CHECK1-NEXT: br i1 [[TMP46]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK1: then: +// CHECK1-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* +// CHECK1-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func11"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR3]] +// CHECK1-NEXT: br label [[IFCONT:%.*]] +// CHECK1: else: +// CHECK1-NEXT: br label [[IFCONT]] +// CHECK1: ifcont: +// CHECK1-NEXT: [[TMP49:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP50:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP51:%.*]] = and i1 [[TMP49]], [[TMP50]] +// CHECK1-NEXT: br i1 [[TMP51]], label [[THEN6:%.*]], label [[ELSE7:%.*]] +// CHECK1: then6: +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP53:%.*]] = load i8*, i8** [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP55:%.*]] = load i8*, i8** [[TMP54]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = bitcast i8* [[TMP53]] to i32* +// CHECK1-NEXT: [[TMP57:%.*]] = bitcast i8* [[TMP55]] to i32* +// CHECK1-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP56]], align 4 +// CHECK1-NEXT: store i32 [[TMP58]], i32* [[TMP57]], align 4 +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP60:%.*]] = load i8*, i8** [[TMP59]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP62:%.*]] = load i8*, i8** [[TMP61]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = bitcast i8* [[TMP60]] to i16* +// CHECK1-NEXT: [[TMP64:%.*]] = bitcast i8* [[TMP62]] to i16* +// CHECK1-NEXT: [[TMP65:%.*]] = load i16, i16* [[TMP63]], align 2 +// CHECK1-NEXT: store i16 [[TMP65]], i16* [[TMP64]], align 2 +// CHECK1-NEXT: br label [[IFCONT8:%.*]] +// CHECK1: else7: +// CHECK1-NEXT: br label [[IFCONT8]] +// CHECK1: ifcont8: +// CHECK1-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_worker -// CHECK5-SAME: () #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK5-NEXT: store i8* null, i8** [[WORK_FN]], align 4 -// CHECK5-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK5-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK5: .await.work: -// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK5-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK5-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK5-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK5-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK5-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK5-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK5: .select.workers: -// CHECK5-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK5-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK5-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK5: .execute.parallel: -// CHECK5-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK5-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK5-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) -// CHECK5-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK5: .terminate.parallel: -// CHECK5-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK5-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK5: .barrier.parallel: -// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK5-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK5: .exit: -// CHECK5-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func13 +// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK1-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK1-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 +// CHECK1-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP2]]) +// CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK1: then: +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: store volatile i32 [[TMP9]], i32 addrspace(3)* [[TMP8]], align 4 +// CHECK1-NEXT: br label [[IFCONT:%.*]] +// CHECK1: else: +// CHECK1-NEXT: br label [[IFCONT]] +// CHECK1: ifcont: +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] +// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK1: then4: +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to i32* +// CHECK1-NEXT: [[TMP15:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP11]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK1-NEXT: br label [[IFCONT6:%.*]] +// CHECK1: else5: +// CHECK1-NEXT: br label [[IFCONT6]] +// CHECK1: ifcont6: +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK1-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK1-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] +// CHECK1: then8: +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i16* +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i32 addrspace(3)* [[TMP19]] to i16 addrspace(3)* +// CHECK1-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP18]], align 2 +// CHECK1-NEXT: store volatile i16 [[TMP21]], i16 addrspace(3)* [[TMP20]], align 2 +// CHECK1-NEXT: br label [[IFCONT10:%.*]] +// CHECK1: else9: +// CHECK1-NEXT: br label [[IFCONT10]] +// CHECK1: ifcont10: +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP22]] +// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] +// CHECK1: then12: +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast i32 addrspace(3)* [[TMP23]] to i16 addrspace(3)* +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i8*, i8** [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast i8* [[TMP26]] to i16* +// CHECK1-NEXT: [[TMP28:%.*]] = load volatile i16, i16 addrspace(3)* [[TMP24]], align 2 +// CHECK1-NEXT: store i16 [[TMP28]], i16* [[TMP27]], align 2 +// CHECK1-NEXT: br label [[IFCONT14:%.*]] +// CHECK1: else13: +// CHECK1-NEXT: br label [[IFCONT14]] +// CHECK1: ifcont14: +// CHECK1-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29 -// CHECK5-SAME: (i32 [[C:%.*]], i32 [[D:%.*]]) #[[ATTR1]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[C]], i32* [[C_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[D]], i32* [[D_ADDR]], align 4 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i32* [[C_ADDR]] to i8* -// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i32* [[D_ADDR]] to float* -// CHECK5-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK5-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK5-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK5-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK5-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK5: .worker: -// CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_worker() #[[ATTR3]] -// CHECK5-NEXT: br label [[DOTEXIT:%.*]] -// CHECK5: .mastercheck: -// CHECK5-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK5-NEXT: [[NVPTX_NUM_THREADS3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK5-NEXT: [[NVPTX_WARP_SIZE4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK5-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE4]], 1 -// CHECK5-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS3]], 1 -// CHECK5-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK5-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK5-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID2]], [[MASTER_TID]] -// CHECK5-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK5: .master: -// CHECK5-NEXT: [[NVPTX_NUM_THREADS5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK5-NEXT: [[NVPTX_WARP_SIZE6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK5-NEXT: [[THREAD_LIMIT7:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS5]], [[NVPTX_WARP_SIZE6]] -// CHECK5-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT7]], i16 1) -// CHECK5-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK5-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 8, i16 1) -// CHECK5-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.1* -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 4 -// CHECK5-NEXT: [[C8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 1 -// CHECK5-NEXT: store i8 [[TMP7]], i8* [[C8]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load float, float* [[CONV1]], align 4 -// CHECK5-NEXT: [[D9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 0 -// CHECK5-NEXT: store float [[TMP8]], float* [[D9]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK5-NEXT: store i32 [[TMP9]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK5-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C8]], float* [[D9]]) #[[ATTR3]] -// CHECK5-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP5]]) -// CHECK5-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK5: .termination.notifier: -// CHECK5-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK5-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK5-NEXT: br label [[DOTEXIT]] -// CHECK5: .exit: -// CHECK5-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func15 +// CHECK1-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2 +// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 +// CHECK1-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 +// CHECK1-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 +// CHECK1-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* +// CHECK1-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 +// CHECK1-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i64 1 +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK1-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[TMP16:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 +// CHECK1-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP16]]) +// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP12]], i64 1 +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i64 1 +// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK1-NEXT: store i8* [[TMP20]], i8** [[TMP11]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP22]] to i16* +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr i16, i16* [[TMP24]], i64 1 +// CHECK1-NEXT: [[TMP26:%.*]] = bitcast i16* [[TMP25]] to i8* +// CHECK1-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP24]], align 2 +// CHECK1-NEXT: [[TMP28:%.*]] = sext i16 [[TMP27]] to i32 +// CHECK1-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK1-NEXT: [[TMP29:%.*]] = trunc i32 [[NVPTX_WARP_SIZE5]] to i16 +// CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP28]], i16 [[TMP7]], i16 [[TMP29]]) +// CHECK1-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 +// CHECK1-NEXT: store i16 [[TMP31]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 +// CHECK1-NEXT: [[TMP32:%.*]] = getelementptr i16, i16* [[TMP24]], i64 1 +// CHECK1-NEXT: [[TMP33:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i64 1 +// CHECK1-NEXT: [[TMP34:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* +// CHECK1-NEXT: store i8* [[TMP34]], i8** [[TMP23]], align 8 +// CHECK1-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK1-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP37:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP38:%.*]] = and i1 [[TMP36]], [[TMP37]] +// CHECK1-NEXT: [[TMP39:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK1-NEXT: [[TMP40:%.*]] = and i16 [[TMP6]], 1 +// CHECK1-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP40]], 0 +// CHECK1-NEXT: [[TMP42:%.*]] = and i1 [[TMP39]], [[TMP41]] +// CHECK1-NEXT: [[TMP43:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK1-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] +// CHECK1-NEXT: [[TMP45:%.*]] = or i1 [[TMP35]], [[TMP38]] +// CHECK1-NEXT: [[TMP46:%.*]] = or i1 [[TMP45]], [[TMP44]] +// CHECK1-NEXT: br i1 [[TMP46]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK1: then: +// CHECK1-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* +// CHECK1-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR3]] +// CHECK1-NEXT: br label [[IFCONT:%.*]] +// CHECK1: else: +// CHECK1-NEXT: br label [[IFCONT]] +// CHECK1: ifcont: +// CHECK1-NEXT: [[TMP49:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK1-NEXT: [[TMP50:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK1-NEXT: [[TMP51:%.*]] = and i1 [[TMP49]], [[TMP50]] +// CHECK1-NEXT: br i1 [[TMP51]], label [[THEN6:%.*]], label [[ELSE7:%.*]] +// CHECK1: then6: +// CHECK1-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP53:%.*]] = load i8*, i8** [[TMP52]], align 8 +// CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP55:%.*]] = load i8*, i8** [[TMP54]], align 8 +// CHECK1-NEXT: [[TMP56:%.*]] = bitcast i8* [[TMP53]] to i32* +// CHECK1-NEXT: [[TMP57:%.*]] = bitcast i8* [[TMP55]] to i32* +// CHECK1-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP56]], align 4 +// CHECK1-NEXT: store i32 [[TMP58]], i32* [[TMP57]], align 4 +// CHECK1-NEXT: [[TMP59:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP60:%.*]] = load i8*, i8** [[TMP59]], align 8 +// CHECK1-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP62:%.*]] = load i8*, i8** [[TMP61]], align 8 +// CHECK1-NEXT: [[TMP63:%.*]] = bitcast i8* [[TMP60]] to i16* +// CHECK1-NEXT: [[TMP64:%.*]] = bitcast i8* [[TMP62]] to i16* +// CHECK1-NEXT: [[TMP65:%.*]] = load i16, i16* [[TMP63]], align 2 +// CHECK1-NEXT: store i16 [[TMP65]], i16* [[TMP64]], align 2 +// CHECK1-NEXT: br label [[IFCONT8:%.*]] +// CHECK1: else7: +// CHECK1-NEXT: br label [[IFCONT8]] +// CHECK1: ifcont8: +// CHECK1-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8* nonnull align 1 dereferenceable(1) [[C:%.*]], float* nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 -// CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 4 -// CHECK5-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 8, i16 1) -// CHECK5-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct._globalized_locals_ty.2* -// CHECK5-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], %struct._globalized_locals_ty.2* [[TMP3]], i32 0, i32 1 -// CHECK5-NEXT: [[D2:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], %struct._globalized_locals_ty.2* [[TMP3]], i32 0, i32 0 -// CHECK5-NEXT: store i8 0, i8* [[C1]], align 4 -// CHECK5-NEXT: store float 1.000000e+00, float* [[D2]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = load i8, i8* [[C1]], align 4 -// CHECK5-NEXT: [[CONV:%.*]] = sext i8 [[TMP4]] to i32 -// CHECK5-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2 -// CHECK5-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8 -// CHECK5-NEXT: store i8 [[CONV3]], i8* [[C1]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = load float, float* [[D2]], align 4 -// CHECK5-NEXT: [[MUL:%.*]] = fmul float [[TMP5]], 3.300000e+01 -// CHECK5-NEXT: store float [[MUL]], float* [[D2]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK5-NEXT: store i8* [[C1]], i8** [[TMP8]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP10:%.*]] = bitcast float* [[D2]] to i8* -// CHECK5-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK5-NEXT: [[TMP12:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i8* [[TMP12]], i32 1024, i8* [[TMP11]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func3, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func4, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func5, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func6, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func7, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func8) -// CHECK5-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 1 -// CHECK5-NEXT: br i1 [[TMP14]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] -// CHECK5: .omp.reduction.then: -// CHECK5-NEXT: [[TMP15:%.*]] = load i8, i8* [[TMP0]], align 1 -// CHECK5-NEXT: [[CONV4:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK5-NEXT: [[TMP16:%.*]] = load i8, i8* [[C1]], align 4 -// CHECK5-NEXT: [[CONV5:%.*]] = sext i8 [[TMP16]] to i32 -// CHECK5-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] -// CHECK5-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 -// CHECK5-NEXT: store i8 [[CONV7]], i8* [[TMP0]], align 1 -// CHECK5-NEXT: [[TMP17:%.*]] = load float, float* [[TMP1]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = load float, float* [[D2]], align 4 -// CHECK5-NEXT: [[MUL8:%.*]] = fmul float [[TMP17]], [[TMP18]] -// CHECK5-NEXT: store float [[MUL8]], float* [[TMP1]], align 4 -// CHECK5-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP7]]) -// CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DONE]] -// CHECK5: .omp.reduction.done: -// CHECK5-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP2]]) -// CHECK5-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func16 +// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK1-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK1-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 +// CHECK1-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK1-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK1-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK1-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK1: then: +// CHECK1-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK1-NEXT: store volatile i32 [[TMP9]], i32 addrspace(3)* [[TMP8]], align 4 +// CHECK1-NEXT: br label [[IFCONT:%.*]] +// CHECK1: else: +// CHECK1-NEXT: br label [[IFCONT]] +// CHECK1: ifcont: +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] +// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK1: then4: +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK1-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to i32* +// CHECK1-NEXT: [[TMP15:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP11]], align 4 +// CHECK1-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK1-NEXT: br label [[IFCONT6:%.*]] +// CHECK1: else5: +// CHECK1-NEXT: br label [[IFCONT6]] +// CHECK1: ifcont6: +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK1-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK1-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] +// CHECK1: then8: +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 8 +// CHECK1-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i16* +// CHECK1-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK1-NEXT: [[TMP20:%.*]] = bitcast i32 addrspace(3)* [[TMP19]] to i16 addrspace(3)* +// CHECK1-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP18]], align 2 +// CHECK1-NEXT: store volatile i16 [[TMP21]], i16 addrspace(3)* [[TMP20]], align 2 +// CHECK1-NEXT: br label [[IFCONT10:%.*]] +// CHECK1: else9: +// CHECK1-NEXT: br label [[IFCONT10]] +// CHECK1: ifcont10: +// CHECK1-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP22]] +// CHECK1-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] +// CHECK1: then12: +// CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK1-NEXT: [[TMP24:%.*]] = bitcast i32 addrspace(3)* [[TMP23]] to i16 addrspace(3)* +// CHECK1-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP26:%.*]] = load i8*, i8** [[TMP25]], align 8 +// CHECK1-NEXT: [[TMP27:%.*]] = bitcast i8* [[TMP26]] to i16* +// CHECK1-NEXT: [[TMP28:%.*]] = load volatile i16, i16 addrspace(3)* [[TMP24]], align 2 +// CHECK1-NEXT: store i16 [[TMP28]], i16* [[TMP27]], align 2 +// CHECK1-NEXT: br label [[IFCONT14:%.*]] +// CHECK1: else13: +// CHECK1-NEXT: br label [[IFCONT14]] +// CHECK1: ifcont14: +// CHECK1-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func3 -// CHECK5-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 -// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 -// CHECK5-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK5-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK5-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i8, align 1 -// CHECK5-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca float, align 4 -// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 -// CHECK5-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 -// CHECK5-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 -// CHECK5-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* -// CHECK5-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 -// CHECK5-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 -// CHECK5-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[TMP10]], i32 1 -// CHECK5-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP10]], align 1 -// CHECK5-NEXT: [[TMP14:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK5-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK5-NEXT: [[TMP15:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 -// CHECK5-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP14]], i16 [[TMP7]], i16 [[TMP15]]) -// CHECK5-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8 -// CHECK5-NEXT: store i8 [[TMP17]], i8* [[DOTOMP_REDUCTION_ELEMENT]], align 1 -// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr i8, i8* [[TMP10]], i32 1 -// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK5-NEXT: store i8* [[DOTOMP_REDUCTION_ELEMENT]], i8** [[TMP11]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 4 -// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP23:%.*]] = bitcast i8* [[TMP21]] to float* -// CHECK5-NEXT: [[TMP24:%.*]] = getelementptr float, float* [[TMP23]], i32 1 -// CHECK5-NEXT: [[TMP25:%.*]] = bitcast float* [[TMP24]] to i8* -// CHECK5-NEXT: [[TMP26:%.*]] = bitcast float* [[TMP23]] to i32* -// CHECK5-NEXT: [[TMP27:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i32* -// CHECK5-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK5-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK5-NEXT: [[TMP29:%.*]] = trunc i32 [[NVPTX_WARP_SIZE5]] to i16 -// CHECK5-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP28]], i16 [[TMP7]], i16 [[TMP29]]) -// CHECK5-NEXT: store i32 [[TMP30]], i32* [[TMP27]], align 4 -// CHECK5-NEXT: [[TMP31:%.*]] = getelementptr i32, i32* [[TMP26]], i32 1 -// CHECK5-NEXT: [[TMP32:%.*]] = getelementptr i32, i32* [[TMP27]], i32 1 -// CHECK5-NEXT: [[TMP33:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* -// CHECK5-NEXT: store i8* [[TMP33]], i8** [[TMP22]], align 4 -// CHECK5-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK5-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK5-NEXT: [[TMP36:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK5-NEXT: [[TMP37:%.*]] = and i1 [[TMP35]], [[TMP36]] -// CHECK5-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK5-NEXT: [[TMP39:%.*]] = and i16 [[TMP6]], 1 -// CHECK5-NEXT: [[TMP40:%.*]] = icmp eq i16 [[TMP39]], 0 -// CHECK5-NEXT: [[TMP41:%.*]] = and i1 [[TMP38]], [[TMP40]] -// CHECK5-NEXT: [[TMP42:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK5-NEXT: [[TMP43:%.*]] = and i1 [[TMP41]], [[TMP42]] -// CHECK5-NEXT: [[TMP44:%.*]] = or i1 [[TMP34]], [[TMP37]] -// CHECK5-NEXT: [[TMP45:%.*]] = or i1 [[TMP44]], [[TMP43]] -// CHECK5-NEXT: br i1 [[TMP45]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK5: then: -// CHECK5-NEXT: [[TMP46:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* -// CHECK5-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK5-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP46]], i8* [[TMP47]]) #[[ATTR3]] -// CHECK5-NEXT: br label [[IFCONT:%.*]] -// CHECK5: else: -// CHECK5-NEXT: br label [[IFCONT]] -// CHECK5: ifcont: -// CHECK5-NEXT: [[TMP48:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK5-NEXT: [[TMP49:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK5-NEXT: [[TMP50:%.*]] = and i1 [[TMP48]], [[TMP49]] -// CHECK5-NEXT: br i1 [[TMP50]], label [[THEN6:%.*]], label [[ELSE7:%.*]] -// CHECK5: then6: -// CHECK5-NEXT: [[TMP51:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP52:%.*]] = load i8*, i8** [[TMP51]], align 4 -// CHECK5-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP54:%.*]] = load i8*, i8** [[TMP53]], align 4 -// CHECK5-NEXT: [[TMP55:%.*]] = load i8, i8* [[TMP52]], align 1 -// CHECK5-NEXT: store i8 [[TMP55]], i8* [[TMP54]], align 1 -// CHECK5-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP57:%.*]] = load i8*, i8** [[TMP56]], align 4 -// CHECK5-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP59:%.*]] = load i8*, i8** [[TMP58]], align 4 -// CHECK5-NEXT: [[TMP60:%.*]] = bitcast i8* [[TMP57]] to float* -// CHECK5-NEXT: [[TMP61:%.*]] = bitcast i8* [[TMP59]] to float* -// CHECK5-NEXT: [[TMP62:%.*]] = load float, float* [[TMP60]], align 4 -// CHECK5-NEXT: store float [[TMP62]], float* [[TMP61]], align 4 -// CHECK5-NEXT: br label [[IFCONT8:%.*]] -// CHECK5: else7: -// CHECK5-NEXT: br label [[IFCONT8]] -// CHECK5: ifcont8: -// CHECK5-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func17 +// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.1* +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP7]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 128 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i16* +// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP7]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP15]], align 2 +// CHECK1-NEXT: store i16 [[TMP17]], i16* [[TMP16]], align 128 +// CHECK1-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func4 -// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK5-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK5-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 -// CHECK5-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK5-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 -// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK5-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK5-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK5: then: -// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK5-NEXT: [[TMP8:%.*]] = bitcast i32 addrspace(3)* [[TMP7]] to i8 addrspace(3)* -// CHECK5-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP6]], align 1 -// CHECK5-NEXT: store volatile i8 [[TMP9]], i8 addrspace(3)* [[TMP8]], align 1 -// CHECK5-NEXT: br label [[IFCONT:%.*]] -// CHECK5: else: -// CHECK5-NEXT: br label [[IFCONT]] -// CHECK5: ifcont: -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] -// CHECK5-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK5: then4: -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK5-NEXT: [[TMP12:%.*]] = bitcast i32 addrspace(3)* [[TMP11]] to i8 addrspace(3)* -// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load volatile i8, i8 addrspace(3)* [[TMP12]], align 1 -// CHECK5-NEXT: store i8 [[TMP15]], i8* [[TMP14]], align 1 -// CHECK5-NEXT: br label [[IFCONT6:%.*]] -// CHECK5: else5: -// CHECK5-NEXT: br label [[IFCONT6]] -// CHECK5: ifcont6: -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK5-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK5-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] -// CHECK5: then8: -// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i32* -// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK5-NEXT: store volatile i32 [[TMP20]], i32 addrspace(3)* [[TMP19]], align 4 -// CHECK5-NEXT: br label [[IFCONT10:%.*]] -// CHECK5: else9: -// CHECK5-NEXT: br label [[IFCONT10]] -// CHECK5: ifcont10: -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP21]] -// CHECK5-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] -// CHECK5: then12: -// CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 4 -// CHECK5-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to i32* -// CHECK5-NEXT: [[TMP26:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP22]], align 4 -// CHECK5-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4 -// CHECK5-NEXT: br label [[IFCONT14:%.*]] -// CHECK5: else13: -// CHECK5-NEXT: br label [[IFCONT14]] -// CHECK5: ifcont14: -// CHECK5-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func18 +// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.1* +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* +// CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to i8* +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP12]], i8* [[TMP13]]) #[[ATTR3]] +// CHECK1-NEXT: ret void +// +// +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func19 +// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK1-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.1* +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 0 +// CHECK1-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 8 +// CHECK1-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP7]] +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 128 +// CHECK1-NEXT: store i32 [[TMP12]], i32* [[TMP10]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i64 0, i64 1 +// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i16* +// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP7]] +// CHECK1-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP16]], align 128 +// CHECK1-NEXT: store i16 [[TMP17]], i16* [[TMP15]], align 2 +// CHECK1-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func5 -// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK5-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.3* -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 -// CHECK5-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3:%.*]], %struct._globalized_locals_ty.3* [[TMP6]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP7]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP9]], align 1 -// CHECK5-NEXT: store i8 [[TMP11]], i8* [[TMP10]], align 128 -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to float* -// CHECK5-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3]], %struct._globalized_locals_ty.3* [[TMP6]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP7]] -// CHECK5-NEXT: [[TMP16:%.*]] = load float, float* [[TMP14]], align 4 -// CHECK5-NEXT: store float [[TMP16]], float* [[TMP15]], align 128 -// CHECK5-NEXT: ret void +// CHECK1-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func20 +// CHECK1-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK1-NEXT: entry: +// CHECK1-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK1-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 8 +// CHECK1-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 8 +// CHECK1-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 8 +// CHECK1-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 8 +// CHECK1-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.1* +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 0 +// CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 0 +// CHECK1-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* +// CHECK1-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i64 0, i64 1 +// CHECK1-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 1 +// CHECK1-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP5]] +// CHECK1-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to i8* +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK1-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 8 +// CHECK1-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP13]], i8* [[TMP12]]) #[[ATTR3]] +// CHECK1-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func6 -// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.3* -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK5-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3:%.*]], %struct._globalized_locals_ty.3* [[TMP4]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP5]] -// CHECK5-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK5-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3]], %struct._globalized_locals_ty.3* [[TMP4]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP5]] -// CHECK5-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to i8* -// CHECK5-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK5-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK5-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP11]], i8* [[TMP12]]) #[[ATTR3]] -// CHECK5-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_worker +// CHECK2-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK2-NEXT: store i8* null, i8** [[WORK_FN]], align 4 +// CHECK2-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK2-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK2: .await.work: +// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK2-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK2-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK2-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK2-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK2-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK2: .select.workers: +// CHECK2-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK2-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK2-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK2: .execute.parallel: +// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK2-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) +// CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK2: .terminate.parallel: +// CHECK2-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK2-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK2: .barrier.parallel: +// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK2-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK2: .exit: +// CHECK2-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func7 -// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK5-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.3* -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 -// CHECK5-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3:%.*]], %struct._globalized_locals_ty.3* [[TMP6]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP7]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 128 -// CHECK5-NEXT: store i8 [[TMP11]], i8* [[TMP9]], align 1 -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to float* -// CHECK5-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3]], %struct._globalized_locals_ty.3* [[TMP6]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP7]] -// CHECK5-NEXT: [[TMP16:%.*]] = load float, float* [[TMP15]], align 128 -// CHECK5-NEXT: store float [[TMP16]], float* [[TMP14]], align 4 -// CHECK5-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20 +// CHECK2-SAME: (double* nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[E_ADDR:%.*]] = alloca double*, align 4 +// CHECK2-NEXT: [[E7:%.*]] = alloca double, align 8 +// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK2-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 +// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK2-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK2-NEXT: br i1 [[TMP1]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK2: .worker: +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_worker() #[[ATTR3:[0-9]+]] +// CHECK2-NEXT: br label [[DOTEXIT:%.*]] +// CHECK2: .mastercheck: +// CHECK2-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK2-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK2-NEXT: [[TMP3:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK2-NEXT: [[TMP4:%.*]] = xor i32 [[TMP2]], -1 +// CHECK2-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP3]], [[TMP4]] +// CHECK2-NEXT: [[TMP5:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK2-NEXT: br i1 [[TMP5]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK2: .master: +// CHECK2-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK2-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: [[TMP7:%.*]] = load double, double* [[TMP0]], align 8 +// CHECK2-NEXT: store double [[TMP7]], double* [[E7]], align 8 +// CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E7]]) #[[ATTR3]] +// CHECK2-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK2: .termination.notifier: +// CHECK2-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK2-NEXT: br label [[DOTEXIT]] +// CHECK2: .exit: +// CHECK2-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func8 -// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.3* -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK5-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3:%.*]], %struct._globalized_locals_ty.3* [[TMP4]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP5]] -// CHECK5-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK5-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3]], %struct._globalized_locals_ty.3* [[TMP4]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP5]] -// CHECK5-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to i8* -// CHECK5-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK5-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK5-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP12]], i8* [[TMP11]]) #[[ATTR3]] -// CHECK5-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], double* nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[E_ADDR:%.*]] = alloca double*, align 4 +// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 +// CHECK2-NEXT: [[E1:%.*]] = call i8* @__kmpc_alloc_shared(i32 8) +// CHECK2-NEXT: [[E_ON_STACK:%.*]] = bitcast i8* [[E1]] to double* +// CHECK2-NEXT: store double 0.000000e+00, double* [[E_ON_STACK]], align 8 +// CHECK2-NEXT: [[TMP1:%.*]] = load double, double* [[E_ON_STACK]], align 8 +// CHECK2-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e+00 +// CHECK2-NEXT: store double [[ADD]], double* [[E_ON_STACK]], align 8 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP5:%.*]] = bitcast double* [[E_ON_STACK]] to i8* +// CHECK2-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i8* [[TMP7]], i32 1024, i8* [[TMP6]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func) +// CHECK2-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1 +// CHECK2-NEXT: br i1 [[TMP9]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK2: .omp.reduction.then: +// CHECK2-NEXT: [[TMP10:%.*]] = load double, double* [[TMP0]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load double, double* [[E_ON_STACK]], align 8 +// CHECK2-NEXT: [[ADD2:%.*]] = fadd double [[TMP10]], [[TMP11]] +// CHECK2-NEXT: store double [[ADD2]], double* [[TMP0]], align 8 +// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP3]]) +// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK2: .omp.reduction.done: +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[E1]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func +// CHECK2-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 4 +// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8 +// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 +// CHECK2-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 +// CHECK2-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 +// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* +// CHECK2-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 +// CHECK2-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 +// CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to double* +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP12]], i32 1 +// CHECK2-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8* +// CHECK2-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP12]] to i64* +// CHECK2-NEXT: [[TMP16:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i64* +// CHECK2-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 8 +// CHECK2-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[TMP18:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 +// CHECK2-NEXT: [[TMP19:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP18]]) +// CHECK2-NEXT: store i64 [[TMP19]], i64* [[TMP16]], align 8 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr i64, i64* [[TMP15]], i32 1 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP16]], i32 1 +// CHECK2-NEXT: [[TMP22:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK2-NEXT: store i8* [[TMP22]], i8** [[TMP11]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK2-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK2-NEXT: [[TMP25:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP26:%.*]] = and i1 [[TMP24]], [[TMP25]] +// CHECK2-NEXT: [[TMP27:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK2-NEXT: [[TMP28:%.*]] = and i16 [[TMP6]], 1 +// CHECK2-NEXT: [[TMP29:%.*]] = icmp eq i16 [[TMP28]], 0 +// CHECK2-NEXT: [[TMP30:%.*]] = and i1 [[TMP27]], [[TMP29]] +// CHECK2-NEXT: [[TMP31:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK2-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]] +// CHECK2-NEXT: [[TMP33:%.*]] = or i1 [[TMP23]], [[TMP26]] +// CHECK2-NEXT: [[TMP34:%.*]] = or i1 [[TMP33]], [[TMP32]] +// CHECK2-NEXT: br i1 [[TMP34]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK2: then: +// CHECK2-NEXT: [[TMP35:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* +// CHECK2-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR3]] +// CHECK2-NEXT: br label [[IFCONT:%.*]] +// CHECK2: else: +// CHECK2-NEXT: br label [[IFCONT]] +// CHECK2: ifcont: +// CHECK2-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK2-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] +// CHECK2-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK2: then4: +// CHECK2-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP41:%.*]] = load i8*, i8** [[TMP40]], align 4 +// CHECK2-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 4 +// CHECK2-NEXT: [[TMP44:%.*]] = bitcast i8* [[TMP41]] to double* +// CHECK2-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP43]] to double* +// CHECK2-NEXT: [[TMP46:%.*]] = load double, double* [[TMP44]], align 8 +// CHECK2-NEXT: store double [[TMP46]], double* [[TMP45]], align 8 +// CHECK2-NEXT: br label [[IFCONT6:%.*]] +// CHECK2: else5: +// CHECK2-NEXT: br label [[IFCONT6]] +// CHECK2: ifcont6: +// CHECK2-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l36 -// CHECK5-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) #[[ATTR1]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK5-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK5-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK5-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK5-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() -// CHECK5-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK5: .execute: -// CHECK5-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK5-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK5-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[A_ADDR]], i16* [[CONV]]) #[[ATTR3]] -// CHECK5-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK5: .omp.deinit: -// CHECK5-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) -// CHECK5-NEXT: br label [[DOTEXIT:%.*]] -// CHECK5: .exit: -// CHECK5-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func +// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK2-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK2-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 +// CHECK2-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK2-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 +// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* +// CHECK2-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4 +// CHECK2-NEXT: br label [[PRECOND:%.*]] +// CHECK2: precond: +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP5]], 2 +// CHECK2-NEXT: br i1 [[TMP6]], label [[BODY:%.*]], label [[EXIT:%.*]] +// CHECK2: body: +// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK2-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK2-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK2: then: +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[TMP9]], i32 [[TMP5]] +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK2-NEXT: store volatile i32 [[TMP12]], i32 addrspace(3)* [[TMP11]], align 4 +// CHECK2-NEXT: br label [[IFCONT:%.*]] +// CHECK2: else: +// CHECK2-NEXT: br label [[IFCONT]] +// CHECK2: ifcont: +// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP13]] +// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK2: then4: +// CHECK2-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i32* +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP17]], i32 [[TMP5]] +// CHECK2-NEXT: [[TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4 +// CHECK2-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4 +// CHECK2-NEXT: br label [[IFCONT6:%.*]] +// CHECK2: else5: +// CHECK2-NEXT: br label [[IFCONT6]] +// CHECK2: ifcont6: +// CHECK2-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK2-NEXT: store i32 [[TMP20]], i32* [[DOTCNT_ADDR]], align 4 +// CHECK2-NEXT: br label [[PRECOND]] +// CHECK2: exit: +// CHECK2-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i16* nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 -// CHECK5-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[B2:%.*]] = alloca i16, align 2 -// CHECK5-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK5-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 -// CHECK5-NEXT: store i32 0, i32* [[A1]], align 4 -// CHECK5-NEXT: store i16 -32768, i16* [[B2]], align 2 -// CHECK5-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP3:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK5-NEXT: store i8* [[TMP3]], i8** [[TMP2]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP5:%.*]] = bitcast i16* [[B2]] to i8* -// CHECK5-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK5-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*)* @__omp_outlined__10 to i8*), i8* null, i8** [[TMP8]], i32 2) -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP10:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK5-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP12:%.*]] = bitcast i16* [[B2]] to i8* -// CHECK5-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK5-NEXT: [[TMP14:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i8* [[TMP14]], i32 1024, i8* [[TMP13]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func15, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func16, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func17, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func18, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func19, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func20) -// CHECK5-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1 -// CHECK5-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] -// CHECK5: .omp.reduction.then: -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK5-NEXT: [[OR:%.*]] = or i32 [[TMP17]], [[TMP18]] -// CHECK5-NEXT: store i32 [[OR]], i32* [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32 -// CHECK5-NEXT: [[TMP20:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK5-NEXT: [[CONV3:%.*]] = sext i16 [[TMP20]] to i32 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV3]] -// CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK5: cond.true: -// CHECK5-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK5-NEXT: br label [[COND_END:%.*]] -// CHECK5: cond.false: -// CHECK5-NEXT: [[TMP22:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK5-NEXT: br label [[COND_END]] -// CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] -// CHECK5-NEXT: store i16 [[COND]], i16* [[TMP1]], align 2 -// CHECK5-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP7]]) -// CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DONE]] -// CHECK5: .omp.reduction.done: -// CHECK5-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func +// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* +// CHECK2-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty* +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to double* +// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x double], [1024 x double]* [[E]], i32 0, i32 [[TMP7]] +// CHECK2-NEXT: [[TMP12:%.*]] = load double, double* [[TMP10]], align 8 +// CHECK2-NEXT: store double [[TMP12]], double* [[TMP11]], align 128 +// CHECK2-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i16* nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK5-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 -// CHECK5-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[B2:%.*]] = alloca i16, align 2 -// CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK5-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK5-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK5-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 -// CHECK5-NEXT: store i32 0, i32* [[A1]], align 4 -// CHECK5-NEXT: store i16 -32768, i16* [[B2]], align 2 -// CHECK5-NEXT: [[TMP2:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK5-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1 -// CHECK5-NEXT: store i32 [[OR]], i32* [[A1]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK5-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]] -// CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK5: cond.true: -// CHECK5-NEXT: br label [[COND_END:%.*]] -// CHECK5: cond.false: -// CHECK5-NEXT: [[TMP4:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK5-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32 -// CHECK5-NEXT: br label [[COND_END]] -// CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK5-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16 -// CHECK5-NEXT: store i16 [[CONV4]], i16* [[B2]], align 2 -// CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP8:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK5-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP10:%.*]] = bitcast i16* [[B2]] to i8* -// CHECK5-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK5-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 2, i32 8, i8* [[TMP11]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func12, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func13) -// CHECK5-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 -// CHECK5-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] -// CHECK5: .omp.reduction.then: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK5-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]] -// CHECK5-NEXT: store i32 [[OR5]], i32* [[TMP0]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK5-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK5-NEXT: [[TMP17:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK5-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32 -// CHECK5-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] -// CHECK5-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] -// CHECK5: cond.true9: -// CHECK5-NEXT: [[TMP18:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK5-NEXT: br label [[COND_END11:%.*]] -// CHECK5: cond.false10: -// CHECK5-NEXT: [[TMP19:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK5-NEXT: br label [[COND_END11]] -// CHECK5: cond.end11: -// CHECK5-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ] -// CHECK5-NEXT: store i16 [[COND12]], i16* [[TMP1]], align 2 -// CHECK5-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) -// CHECK5-NEXT: br label [[DOTOMP_REDUCTION_DONE]] -// CHECK5: .omp.reduction.done: -// CHECK5-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func +// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 +// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty* +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x double], [1024 x double]* [[E]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast double* [[TMP7]] to i8* +// CHECK2-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP9]], i8* [[TMP10]]) #[[ATTR3]] +// CHECK2-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func12 -// CHECK5-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 -// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 -// CHECK5-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK5-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK5-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2 -// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 -// CHECK5-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 -// CHECK5-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 -// CHECK5-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* -// CHECK5-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 -// CHECK5-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 -// CHECK5-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* -// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 -// CHECK5-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK5-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK5-NEXT: [[TMP16:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 -// CHECK5-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP16]]) -// CHECK5-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 -// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK5-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK5-NEXT: store i8* [[TMP20]], i8** [[TMP11]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 4 -// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP22]] to i16* -// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 -// CHECK5-NEXT: [[TMP26:%.*]] = bitcast i16* [[TMP25]] to i8* -// CHECK5-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP24]], align 2 -// CHECK5-NEXT: [[TMP28:%.*]] = sext i16 [[TMP27]] to i32 -// CHECK5-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK5-NEXT: [[TMP29:%.*]] = trunc i32 [[NVPTX_WARP_SIZE5]] to i16 -// CHECK5-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP28]], i16 [[TMP7]], i16 [[TMP29]]) -// CHECK5-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 -// CHECK5-NEXT: store i16 [[TMP31]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK5-NEXT: [[TMP32:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 -// CHECK5-NEXT: [[TMP33:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 -// CHECK5-NEXT: [[TMP34:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* -// CHECK5-NEXT: store i8* [[TMP34]], i8** [[TMP23]], align 4 -// CHECK5-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK5-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK5-NEXT: [[TMP37:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK5-NEXT: [[TMP38:%.*]] = and i1 [[TMP36]], [[TMP37]] -// CHECK5-NEXT: [[TMP39:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK5-NEXT: [[TMP40:%.*]] = and i16 [[TMP6]], 1 -// CHECK5-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP40]], 0 -// CHECK5-NEXT: [[TMP42:%.*]] = and i1 [[TMP39]], [[TMP41]] -// CHECK5-NEXT: [[TMP43:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK5-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] -// CHECK5-NEXT: [[TMP45:%.*]] = or i1 [[TMP35]], [[TMP38]] -// CHECK5-NEXT: [[TMP46:%.*]] = or i1 [[TMP45]], [[TMP44]] -// CHECK5-NEXT: br i1 [[TMP46]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK5: then: -// CHECK5-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* -// CHECK5-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK5-NEXT: call void @"_omp$reduction$reduction_func11"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR3]] -// CHECK5-NEXT: br label [[IFCONT:%.*]] -// CHECK5: else: -// CHECK5-NEXT: br label [[IFCONT]] -// CHECK5: ifcont: -// CHECK5-NEXT: [[TMP49:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK5-NEXT: [[TMP50:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK5-NEXT: [[TMP51:%.*]] = and i1 [[TMP49]], [[TMP50]] -// CHECK5-NEXT: br i1 [[TMP51]], label [[THEN6:%.*]], label [[ELSE7:%.*]] -// CHECK5: then6: -// CHECK5-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP53:%.*]] = load i8*, i8** [[TMP52]], align 4 -// CHECK5-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP55:%.*]] = load i8*, i8** [[TMP54]], align 4 -// CHECK5-NEXT: [[TMP56:%.*]] = bitcast i8* [[TMP53]] to i32* -// CHECK5-NEXT: [[TMP57:%.*]] = bitcast i8* [[TMP55]] to i32* -// CHECK5-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP56]], align 4 -// CHECK5-NEXT: store i32 [[TMP58]], i32* [[TMP57]], align 4 -// CHECK5-NEXT: [[TMP59:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP60:%.*]] = load i8*, i8** [[TMP59]], align 4 -// CHECK5-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP62:%.*]] = load i8*, i8** [[TMP61]], align 4 -// CHECK5-NEXT: [[TMP63:%.*]] = bitcast i8* [[TMP60]] to i16* -// CHECK5-NEXT: [[TMP64:%.*]] = bitcast i8* [[TMP62]] to i16* -// CHECK5-NEXT: [[TMP65:%.*]] = load i16, i16* [[TMP63]], align 2 -// CHECK5-NEXT: store i16 [[TMP65]], i16* [[TMP64]], align 2 -// CHECK5-NEXT: br label [[IFCONT8:%.*]] -// CHECK5: else7: -// CHECK5-NEXT: br label [[IFCONT8]] -// CHECK5: ifcont8: -// CHECK5-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func +// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* +// CHECK2-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty* +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to double* +// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x double], [1024 x double]* [[E]], i32 0, i32 [[TMP7]] +// CHECK2-NEXT: [[TMP12:%.*]] = load double, double* [[TMP11]], align 128 +// CHECK2-NEXT: store double [[TMP12]], double* [[TMP10]], align 8 +// CHECK2-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func13 -// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK5-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK5-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 -// CHECK5-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK5-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 -// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP2]]) -// CHECK5-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK5-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK5: then: -// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK5-NEXT: store volatile i32 [[TMP9]], i32 addrspace(3)* [[TMP8]], align 4 -// CHECK5-NEXT: br label [[IFCONT:%.*]] -// CHECK5: else: -// CHECK5-NEXT: br label [[IFCONT]] -// CHECK5: ifcont: -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] -// CHECK5-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK5: then4: -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to i32* -// CHECK5-NEXT: [[TMP15:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP11]], align 4 -// CHECK5-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 -// CHECK5-NEXT: br label [[IFCONT6:%.*]] -// CHECK5: else5: -// CHECK5-NEXT: br label [[IFCONT6]] -// CHECK5: ifcont6: -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK5-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK5-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] -// CHECK5: then8: -// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i16* -// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK5-NEXT: [[TMP20:%.*]] = bitcast i32 addrspace(3)* [[TMP19]] to i16 addrspace(3)* -// CHECK5-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP18]], align 2 -// CHECK5-NEXT: store volatile i16 [[TMP21]], i16 addrspace(3)* [[TMP20]], align 2 -// CHECK5-NEXT: br label [[IFCONT10:%.*]] -// CHECK5: else9: -// CHECK5-NEXT: br label [[IFCONT10]] -// CHECK5: ifcont10: -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP22]] -// CHECK5-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] -// CHECK5: then12: -// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK5-NEXT: [[TMP24:%.*]] = bitcast i32 addrspace(3)* [[TMP23]] to i16 addrspace(3)* -// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP26:%.*]] = load i8*, i8** [[TMP25]], align 4 -// CHECK5-NEXT: [[TMP27:%.*]] = bitcast i8* [[TMP26]] to i16* -// CHECK5-NEXT: [[TMP28:%.*]] = load volatile i16, i16 addrspace(3)* [[TMP24]], align 2 -// CHECK5-NEXT: store i16 [[TMP28]], i16* [[TMP27]], align 2 -// CHECK5-NEXT: br label [[IFCONT14:%.*]] -// CHECK5: else13: -// CHECK5-NEXT: br label [[IFCONT14]] -// CHECK5: ifcont14: -// CHECK5-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func +// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 +// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty* +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x double], [1024 x double]* [[E]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast double* [[TMP7]] to i8* +// CHECK2-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP10]], i8* [[TMP9]]) #[[ATTR3]] +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_worker +// CHECK2-SAME: () #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK2-NEXT: store i8* null, i8** [[WORK_FN]], align 4 +// CHECK2-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK2-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK2: .await.work: +// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK2-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK2-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK2-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK2-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK2-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK2-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK2: .select.workers: +// CHECK2-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK2-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK2-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK2: .execute.parallel: +// CHECK2-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK2-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) +// CHECK2-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK2: .terminate.parallel: +// CHECK2-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK2-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK2: .barrier.parallel: +// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK2-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK2: .exit: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26 +// CHECK2-SAME: (i32 [[C:%.*]], i32 [[D:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[C]], i32* [[C_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[D]], i32* [[D_ADDR]], align 4 +// CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[C_ADDR]] to i8* +// CHECK2-NEXT: [[CONV1:%.*]] = bitcast i32* [[D_ADDR]] to float* +// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK2-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK2-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK2: .worker: +// CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_worker() #[[ATTR3]] +// CHECK2-NEXT: br label [[DOTEXIT:%.*]] +// CHECK2: .mastercheck: +// CHECK2-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK2-NEXT: [[NVPTX_NUM_THREADS3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[NVPTX_WARP_SIZE4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE4]], 1 +// CHECK2-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS3]], 1 +// CHECK2-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK2-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK2-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID2]], [[MASTER_TID]] +// CHECK2-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK2: .master: +// CHECK2-NEXT: [[NVPTX_NUM_THREADS5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: [[NVPTX_WARP_SIZE6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[THREAD_LIMIT7:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS5]], [[NVPTX_WARP_SIZE6]] +// CHECK2-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT7]], i16 1) +// CHECK2-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK2-NEXT: [[C8:%.*]] = call i8* @__kmpc_alloc_shared(i32 1) +// CHECK2-NEXT: store i8 [[TMP5]], i8* [[C8]], align 1 +// CHECK2-NEXT: [[TMP6:%.*]] = load float, float* [[CONV1]], align 4 +// CHECK2-NEXT: [[D9:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D9]] to float* +// CHECK2-NEXT: store float [[TMP6]], float* [[D_ON_STACK]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C8]], float* [[D_ON_STACK]]) #[[ATTR3]] +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[D9]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[C8]]) +// CHECK2-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK2: .termination.notifier: +// CHECK2-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK2-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK2-NEXT: br label [[DOTEXIT]] +// CHECK2: .exit: +// CHECK2-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func15 -// CHECK5-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 -// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 -// CHECK5-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK5-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK5-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2 -// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 -// CHECK5-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 -// CHECK5-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 -// CHECK5-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* -// CHECK5-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 -// CHECK5-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 -// CHECK5-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* -// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 -// CHECK5-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK5-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK5-NEXT: [[TMP16:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 -// CHECK5-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP16]]) -// CHECK5-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 -// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK5-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK5-NEXT: store i8* [[TMP20]], i8** [[TMP11]], align 4 -// CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 4 -// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP22]] to i16* -// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 -// CHECK5-NEXT: [[TMP26:%.*]] = bitcast i16* [[TMP25]] to i8* -// CHECK5-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP24]], align 2 -// CHECK5-NEXT: [[TMP28:%.*]] = sext i16 [[TMP27]] to i32 -// CHECK5-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK5-NEXT: [[TMP29:%.*]] = trunc i32 [[NVPTX_WARP_SIZE5]] to i16 -// CHECK5-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP28]], i16 [[TMP7]], i16 [[TMP29]]) -// CHECK5-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 -// CHECK5-NEXT: store i16 [[TMP31]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK5-NEXT: [[TMP32:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 -// CHECK5-NEXT: [[TMP33:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 -// CHECK5-NEXT: [[TMP34:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* -// CHECK5-NEXT: store i8* [[TMP34]], i8** [[TMP23]], align 4 -// CHECK5-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK5-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK5-NEXT: [[TMP37:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK5-NEXT: [[TMP38:%.*]] = and i1 [[TMP36]], [[TMP37]] -// CHECK5-NEXT: [[TMP39:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK5-NEXT: [[TMP40:%.*]] = and i16 [[TMP6]], 1 -// CHECK5-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP40]], 0 -// CHECK5-NEXT: [[TMP42:%.*]] = and i1 [[TMP39]], [[TMP41]] -// CHECK5-NEXT: [[TMP43:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK5-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] -// CHECK5-NEXT: [[TMP45:%.*]] = or i1 [[TMP35]], [[TMP38]] -// CHECK5-NEXT: [[TMP46:%.*]] = or i1 [[TMP45]], [[TMP44]] -// CHECK5-NEXT: br i1 [[TMP46]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK5: then: -// CHECK5-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* -// CHECK5-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK5-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR3]] -// CHECK5-NEXT: br label [[IFCONT:%.*]] -// CHECK5: else: -// CHECK5-NEXT: br label [[IFCONT]] -// CHECK5: ifcont: -// CHECK5-NEXT: [[TMP49:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK5-NEXT: [[TMP50:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK5-NEXT: [[TMP51:%.*]] = and i1 [[TMP49]], [[TMP50]] -// CHECK5-NEXT: br i1 [[TMP51]], label [[THEN6:%.*]], label [[ELSE7:%.*]] -// CHECK5: then6: -// CHECK5-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP53:%.*]] = load i8*, i8** [[TMP52]], align 4 -// CHECK5-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP55:%.*]] = load i8*, i8** [[TMP54]], align 4 -// CHECK5-NEXT: [[TMP56:%.*]] = bitcast i8* [[TMP53]] to i32* -// CHECK5-NEXT: [[TMP57:%.*]] = bitcast i8* [[TMP55]] to i32* -// CHECK5-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP56]], align 4 -// CHECK5-NEXT: store i32 [[TMP58]], i32* [[TMP57]], align 4 -// CHECK5-NEXT: [[TMP59:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP60:%.*]] = load i8*, i8** [[TMP59]], align 4 -// CHECK5-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP62:%.*]] = load i8*, i8** [[TMP61]], align 4 -// CHECK5-NEXT: [[TMP63:%.*]] = bitcast i8* [[TMP60]] to i16* -// CHECK5-NEXT: [[TMP64:%.*]] = bitcast i8* [[TMP62]] to i16* -// CHECK5-NEXT: [[TMP65:%.*]] = load i16, i16* [[TMP63]], align 2 -// CHECK5-NEXT: store i16 [[TMP65]], i16* [[TMP64]], align 2 -// CHECK5-NEXT: br label [[IFCONT8:%.*]] -// CHECK5: else7: -// CHECK5-NEXT: br label [[IFCONT8]] -// CHECK5: ifcont8: -// CHECK5-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8* nonnull align 1 dereferenceable(1) [[C:%.*]], float* nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 +// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 4 +// CHECK2-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 4 +// CHECK2-NEXT: [[C1:%.*]] = call i8* @__kmpc_alloc_shared(i32 1) +// CHECK2-NEXT: [[D2:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK2-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D2]] to float* +// CHECK2-NEXT: store i8 0, i8* [[C1]], align 1 +// CHECK2-NEXT: store float 1.000000e+00, float* [[D_ON_STACK]], align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = load i8, i8* [[C1]], align 1 +// CHECK2-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 +// CHECK2-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2 +// CHECK2-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8 +// CHECK2-NEXT: store i8 [[CONV3]], i8* [[C1]], align 1 +// CHECK2-NEXT: [[TMP3:%.*]] = load float, float* [[D_ON_STACK]], align 4 +// CHECK2-NEXT: [[MUL:%.*]] = fmul float [[TMP3]], 3.300000e+01 +// CHECK2-NEXT: store float [[MUL]], float* [[D_ON_STACK]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: store i8* [[C1]], i8** [[TMP6]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast float* [[D_ON_STACK]] to i8* +// CHECK2-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i8* [[TMP10]], i32 1024, i8* [[TMP9]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func3, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func4, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func5, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func6, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func7, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func8) +// CHECK2-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP11]], 1 +// CHECK2-NEXT: br i1 [[TMP12]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK2: .omp.reduction.then: +// CHECK2-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP0]], align 1 +// CHECK2-NEXT: [[CONV4:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK2-NEXT: [[TMP14:%.*]] = load i8, i8* [[C1]], align 1 +// CHECK2-NEXT: [[CONV5:%.*]] = sext i8 [[TMP14]] to i32 +// CHECK2-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] +// CHECK2-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 +// CHECK2-NEXT: store i8 [[CONV7]], i8* [[TMP0]], align 1 +// CHECK2-NEXT: [[TMP15:%.*]] = load float, float* [[TMP1]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load float, float* [[D_ON_STACK]], align 4 +// CHECK2-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP16]] +// CHECK2-NEXT: store float [[MUL8]], float* [[TMP1]], align 4 +// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) +// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK2: .omp.reduction.done: +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[D2]]) +// CHECK2-NEXT: call void @__kmpc_free_shared(i8* [[C1]]) +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func3 +// CHECK2-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i8, align 1 +// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca float, align 4 +// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 +// CHECK2-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 +// CHECK2-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 +// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* +// CHECK2-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 +// CHECK2-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 +// CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[TMP10]], i32 1 +// CHECK2-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP10]], align 1 +// CHECK2-NEXT: [[TMP14:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK2-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[TMP15:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 +// CHECK2-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP14]], i16 [[TMP7]], i16 [[TMP15]]) +// CHECK2-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8 +// CHECK2-NEXT: store i8 [[TMP17]], i8* [[DOTOMP_REDUCTION_ELEMENT]], align 1 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr i8, i8* [[TMP10]], i32 1 +// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK2-NEXT: store i8* [[DOTOMP_REDUCTION_ELEMENT]], i8** [[TMP11]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 4 +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP23:%.*]] = bitcast i8* [[TMP21]] to float* +// CHECK2-NEXT: [[TMP24:%.*]] = getelementptr float, float* [[TMP23]], i32 1 +// CHECK2-NEXT: [[TMP25:%.*]] = bitcast float* [[TMP24]] to i8* +// CHECK2-NEXT: [[TMP26:%.*]] = bitcast float* [[TMP23]] to i32* +// CHECK2-NEXT: [[TMP27:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i32* +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[TMP29:%.*]] = trunc i32 [[NVPTX_WARP_SIZE5]] to i16 +// CHECK2-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP28]], i16 [[TMP7]], i16 [[TMP29]]) +// CHECK2-NEXT: store i32 [[TMP30]], i32* [[TMP27]], align 4 +// CHECK2-NEXT: [[TMP31:%.*]] = getelementptr i32, i32* [[TMP26]], i32 1 +// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr i32, i32* [[TMP27]], i32 1 +// CHECK2-NEXT: [[TMP33:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* +// CHECK2-NEXT: store i8* [[TMP33]], i8** [[TMP22]], align 4 +// CHECK2-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK2-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK2-NEXT: [[TMP36:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP37:%.*]] = and i1 [[TMP35]], [[TMP36]] +// CHECK2-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK2-NEXT: [[TMP39:%.*]] = and i16 [[TMP6]], 1 +// CHECK2-NEXT: [[TMP40:%.*]] = icmp eq i16 [[TMP39]], 0 +// CHECK2-NEXT: [[TMP41:%.*]] = and i1 [[TMP38]], [[TMP40]] +// CHECK2-NEXT: [[TMP42:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK2-NEXT: [[TMP43:%.*]] = and i1 [[TMP41]], [[TMP42]] +// CHECK2-NEXT: [[TMP44:%.*]] = or i1 [[TMP34]], [[TMP37]] +// CHECK2-NEXT: [[TMP45:%.*]] = or i1 [[TMP44]], [[TMP43]] +// CHECK2-NEXT: br i1 [[TMP45]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK2: then: +// CHECK2-NEXT: [[TMP46:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* +// CHECK2-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP46]], i8* [[TMP47]]) #[[ATTR3]] +// CHECK2-NEXT: br label [[IFCONT:%.*]] +// CHECK2: else: +// CHECK2-NEXT: br label [[IFCONT]] +// CHECK2: ifcont: +// CHECK2-NEXT: [[TMP48:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK2-NEXT: [[TMP49:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP50:%.*]] = and i1 [[TMP48]], [[TMP49]] +// CHECK2-NEXT: br i1 [[TMP50]], label [[THEN6:%.*]], label [[ELSE7:%.*]] +// CHECK2: then6: +// CHECK2-NEXT: [[TMP51:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP52:%.*]] = load i8*, i8** [[TMP51]], align 4 +// CHECK2-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP54:%.*]] = load i8*, i8** [[TMP53]], align 4 +// CHECK2-NEXT: [[TMP55:%.*]] = load i8, i8* [[TMP52]], align 1 +// CHECK2-NEXT: store i8 [[TMP55]], i8* [[TMP54]], align 1 +// CHECK2-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP57:%.*]] = load i8*, i8** [[TMP56]], align 4 +// CHECK2-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP59:%.*]] = load i8*, i8** [[TMP58]], align 4 +// CHECK2-NEXT: [[TMP60:%.*]] = bitcast i8* [[TMP57]] to float* +// CHECK2-NEXT: [[TMP61:%.*]] = bitcast i8* [[TMP59]] to float* +// CHECK2-NEXT: [[TMP62:%.*]] = load float, float* [[TMP60]], align 4 +// CHECK2-NEXT: store float [[TMP62]], float* [[TMP61]], align 4 +// CHECK2-NEXT: br label [[IFCONT8:%.*]] +// CHECK2: else7: +// CHECK2-NEXT: br label [[IFCONT8]] +// CHECK2: ifcont8: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func4 +// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK2-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK2-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 +// CHECK2-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK2-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 +// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK2-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK2-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK2: then: +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i32 addrspace(3)* [[TMP7]] to i8 addrspace(3)* +// CHECK2-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP6]], align 1 +// CHECK2-NEXT: store volatile i8 [[TMP9]], i8 addrspace(3)* [[TMP8]], align 1 +// CHECK2-NEXT: br label [[IFCONT:%.*]] +// CHECK2: else: +// CHECK2-NEXT: br label [[IFCONT]] +// CHECK2: ifcont: +// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] +// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK2: then4: +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK2-NEXT: [[TMP12:%.*]] = bitcast i32 addrspace(3)* [[TMP11]] to i8 addrspace(3)* +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load volatile i8, i8 addrspace(3)* [[TMP12]], align 1 +// CHECK2-NEXT: store i8 [[TMP15]], i8* [[TMP14]], align 1 +// CHECK2-NEXT: br label [[IFCONT6:%.*]] +// CHECK2: else5: +// CHECK2-NEXT: br label [[IFCONT6]] +// CHECK2: ifcont6: +// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK2-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK2-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] +// CHECK2: then8: +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i32* +// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK2-NEXT: store volatile i32 [[TMP20]], i32 addrspace(3)* [[TMP19]], align 4 +// CHECK2-NEXT: br label [[IFCONT10:%.*]] +// CHECK2: else9: +// CHECK2-NEXT: br label [[IFCONT10]] +// CHECK2: ifcont10: +// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP21]] +// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] +// CHECK2: then12: +// CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 4 +// CHECK2-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to i32* +// CHECK2-NEXT: [[TMP26:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP22]], align 4 +// CHECK2-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4 +// CHECK2-NEXT: br label [[IFCONT14:%.*]] +// CHECK2: else13: +// CHECK2-NEXT: br label [[IFCONT14]] +// CHECK2: ifcont14: +// CHECK2-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func16 -// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK5-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK5-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 -// CHECK5-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK5-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 -// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK5-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK5-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK5: then: -// CHECK5-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK5-NEXT: store volatile i32 [[TMP9]], i32 addrspace(3)* [[TMP8]], align 4 -// CHECK5-NEXT: br label [[IFCONT:%.*]] -// CHECK5: else: -// CHECK5-NEXT: br label [[IFCONT]] -// CHECK5: ifcont: -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] -// CHECK5-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK5: then4: -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK5-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to i32* -// CHECK5-NEXT: [[TMP15:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP11]], align 4 -// CHECK5-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 -// CHECK5-NEXT: br label [[IFCONT6:%.*]] -// CHECK5: else5: -// CHECK5-NEXT: br label [[IFCONT6]] -// CHECK5: ifcont6: -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK5-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK5-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] -// CHECK5: then8: -// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 4 -// CHECK5-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i16* -// CHECK5-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK5-NEXT: [[TMP20:%.*]] = bitcast i32 addrspace(3)* [[TMP19]] to i16 addrspace(3)* -// CHECK5-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP18]], align 2 -// CHECK5-NEXT: store volatile i16 [[TMP21]], i16 addrspace(3)* [[TMP20]], align 2 -// CHECK5-NEXT: br label [[IFCONT10:%.*]] -// CHECK5: else9: -// CHECK5-NEXT: br label [[IFCONT10]] -// CHECK5: ifcont10: -// CHECK5-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP22]] -// CHECK5-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] -// CHECK5: then12: -// CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK5-NEXT: [[TMP24:%.*]] = bitcast i32 addrspace(3)* [[TMP23]] to i16 addrspace(3)* -// CHECK5-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP26:%.*]] = load i8*, i8** [[TMP25]], align 4 -// CHECK5-NEXT: [[TMP27:%.*]] = bitcast i8* [[TMP26]] to i16* -// CHECK5-NEXT: [[TMP28:%.*]] = load volatile i16, i16 addrspace(3)* [[TMP24]], align 2 -// CHECK5-NEXT: store i16 [[TMP28]], i16* [[TMP27]], align 2 -// CHECK5-NEXT: br label [[IFCONT14:%.*]] -// CHECK5: else13: -// CHECK5-NEXT: br label [[IFCONT14]] -// CHECK5: ifcont14: -// CHECK5-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func5 +// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK2-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.0* +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 +// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP7]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP9]], align 1 +// CHECK2-NEXT: store i8 [[TMP11]], i8* [[TMP10]], align 128 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to float* +// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP7]] +// CHECK2-NEXT: [[TMP16:%.*]] = load float, float* [[TMP14]], align 4 +// CHECK2-NEXT: store float [[TMP16]], float* [[TMP15]], align 128 +// CHECK2-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func17 -// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK5-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.4* -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP7]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK5-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 128 -// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i16* -// CHECK5-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP7]] -// CHECK5-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP15]], align 2 -// CHECK5-NEXT: store i16 [[TMP17]], i16* [[TMP16]], align 128 -// CHECK5-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func6 +// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.0* +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to i8* +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK2-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP11]], i8* [[TMP12]]) #[[ATTR3]] +// CHECK2-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func18 -// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.4* -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP5]] -// CHECK5-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* -// CHECK5-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK5-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP5]] -// CHECK5-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to i8* -// CHECK5-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK5-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK5-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP12]], i8* [[TMP13]]) #[[ATTR3]] -// CHECK5-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func7 +// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK2-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.0* +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 +// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP7]] +// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 128 +// CHECK2-NEXT: store i8 [[TMP11]], i8* [[TMP9]], align 1 +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to float* +// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP7]] +// CHECK2-NEXT: [[TMP16:%.*]] = load float, float* [[TMP15]], align 128 +// CHECK2-NEXT: store float [[TMP16]], float* [[TMP14]], align 4 +// CHECK2-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func19 -// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK5-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.4* -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 -// CHECK5-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP7]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 128 -// CHECK5-NEXT: store i32 [[TMP12]], i32* [[TMP10]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 4 -// CHECK5-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i16* -// CHECK5-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP7]] -// CHECK5-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP16]], align 128 -// CHECK5-NEXT: store i16 [[TMP17]], i16* [[TMP15]], align 2 -// CHECK5-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func8 +// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.0* +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[C]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1024 x float], [1024 x float]* [[D]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to i8* +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK2-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP12]], i8* [[TMP11]]) #[[ATTR3]] +// CHECK2-NEXT: ret void // // -// CHECK5-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func20 -// CHECK5-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK5-NEXT: entry: -// CHECK5-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK5-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK5-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK5-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK5-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK5-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.4* -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP5]] -// CHECK5-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* -// CHECK5-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK5-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP5]] -// CHECK5-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to i8* -// CHECK5-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 4 -// CHECK5-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK5-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK5-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP13]], i8* [[TMP12]]) #[[ATTR3]] -// CHECK5-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33 +// CHECK2-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK2-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 +// CHECK2-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* +// CHECK2-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK2-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) +// CHECK2-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK2: .execute: +// CHECK2-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK2-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[A_ADDR]], i16* [[CONV]]) #[[ATTR3]] +// CHECK2-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK2: .omp.deinit: +// CHECK2-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) +// CHECK2-NEXT: br label [[DOTEXIT:%.*]] +// CHECK2: .exit: +// CHECK2-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_worker -// CHECK6-SAME: () #[[ATTR0:[0-9]+]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK6-NEXT: store i8* null, i8** [[WORK_FN]], align 4 -// CHECK6-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK6-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK6: .await.work: -// CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK6-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK6-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK6-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK6-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK6-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK6-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK6: .select.workers: -// CHECK6-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK6-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK6-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK6: .execute.parallel: -// CHECK6-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) -// CHECK6-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK6-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) -// CHECK6-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK6: .terminate.parallel: -// CHECK6-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK6-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK6: .barrier.parallel: -// CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK6-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK6: .exit: -// CHECK6-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__9 +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i16* nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 +// CHECK2-NEXT: [[A1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B2:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 +// CHECK2-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[A1]], align 4 +// CHECK2-NEXT: store i16 -32768, i16* [[B2]], align 2 +// CHECK2-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP3:%.*]] = bitcast i32* [[A1]] to i8* +// CHECK2-NEXT: store i8* [[TMP3]], i8** [[TMP2]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i16* [[B2]] to i8* +// CHECK2-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK2-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*)* @__omp_outlined__10 to i8*), i8* null, i8** [[TMP8]], i32 2) +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i32* [[A1]] to i8* +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP12:%.*]] = bitcast i16* [[B2]] to i8* +// CHECK2-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i8* [[TMP14]], i32 1024, i8* [[TMP13]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func15, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func16, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func17, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func18, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func19, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func20) +// CHECK2-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1 +// CHECK2-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK2: .omp.reduction.then: +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK2-NEXT: [[OR:%.*]] = or i32 [[TMP17]], [[TMP18]] +// CHECK2-NEXT: store i32 [[OR]], i32* [[TMP0]], align 4 +// CHECK2-NEXT: [[TMP19:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32 +// CHECK2-NEXT: [[TMP20:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK2-NEXT: [[CONV3:%.*]] = sext i16 [[TMP20]] to i32 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV3]] +// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP22:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] +// CHECK2-NEXT: store i16 [[COND]], i16* [[TMP1]], align 2 +// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP7]]) +// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK2: .omp.reduction.done: +// CHECK2-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23 -// CHECK6-SAME: (double* nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[E_ADDR:%.*]] = alloca double*, align 4 -// CHECK6-NEXT: [[E7:%.*]] = alloca double, align 8 -// CHECK6-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK6-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 -// CHECK6-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK6-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK6-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK6-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK6-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK6-NEXT: br i1 [[TMP1]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK6: .worker: -// CHECK6-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l23_worker() #[[ATTR3:[0-9]+]] -// CHECK6-NEXT: br label [[DOTEXIT:%.*]] -// CHECK6: .mastercheck: -// CHECK6-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK6-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK6-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK6-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 -// CHECK6-NEXT: [[TMP3:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 -// CHECK6-NEXT: [[TMP4:%.*]] = xor i32 [[TMP2]], -1 -// CHECK6-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP3]], [[TMP4]] -// CHECK6-NEXT: [[TMP5:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] -// CHECK6-NEXT: br i1 [[TMP5]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK6: .master: -// CHECK6-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK6-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK6-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] -// CHECK6-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) -// CHECK6-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK6-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK6-NEXT: [[TMP7:%.*]] = load double, double* [[TMP0]], align 8 -// CHECK6-NEXT: store double [[TMP7]], double* [[E7]], align 8 -// CHECK6-NEXT: store i32 [[TMP6]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK6-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E7]]) #[[ATTR3]] -// CHECK6-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK6: .termination.notifier: -// CHECK6-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK6-NEXT: br label [[DOTEXIT]] -// CHECK6: .exit: -// CHECK6-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@__omp_outlined__10 +// CHECK2-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i16* nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK2-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 +// CHECK2-NEXT: [[A1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[B2:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK2-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 +// CHECK2-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 +// CHECK2-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 +// CHECK2-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[A1]], align 4 +// CHECK2-NEXT: store i16 -32768, i16* [[B2]], align 2 +// CHECK2-NEXT: [[TMP2:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK2-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1 +// CHECK2-NEXT: store i32 [[OR]], i32* [[A1]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK2-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK2-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]] +// CHECK2-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK2: cond.true: +// CHECK2-NEXT: br label [[COND_END:%.*]] +// CHECK2: cond.false: +// CHECK2-NEXT: [[TMP4:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK2-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK2-NEXT: br label [[COND_END]] +// CHECK2: cond.end: +// CHECK2-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] +// CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16 +// CHECK2-NEXT: store i16 [[CONV4]], i16* [[B2]], align 2 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i32* [[A1]] to i8* +// CHECK2-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i16* [[B2]] to i8* +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK2-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 2, i32 8, i8* [[TMP11]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func12, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func13) +// CHECK2-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 +// CHECK2-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK2: .omp.reduction.then: +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK2-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]] +// CHECK2-NEXT: store i32 [[OR5]], i32* [[TMP0]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK2-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK2-NEXT: [[TMP17:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK2-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32 +// CHECK2-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] +// CHECK2-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] +// CHECK2: cond.true9: +// CHECK2-NEXT: [[TMP18:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK2-NEXT: br label [[COND_END11:%.*]] +// CHECK2: cond.false10: +// CHECK2-NEXT: [[TMP19:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK2-NEXT: br label [[COND_END11]] +// CHECK2: cond.end11: +// CHECK2-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ] +// CHECK2-NEXT: store i16 [[COND12]], i16* [[TMP1]], align 2 +// CHECK2-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) +// CHECK2-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK2: .omp.reduction.done: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func12 +// CHECK2-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 +// CHECK2-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 +// CHECK2-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 +// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* +// CHECK2-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 +// CHECK2-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 +// CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 +// CHECK2-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK2-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[TMP16:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 +// CHECK2-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP16]]) +// CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 +// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK2-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK2-NEXT: store i8* [[TMP20]], i8** [[TMP11]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP22]] to i16* +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 +// CHECK2-NEXT: [[TMP26:%.*]] = bitcast i16* [[TMP25]] to i8* +// CHECK2-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP24]], align 2 +// CHECK2-NEXT: [[TMP28:%.*]] = sext i16 [[TMP27]] to i32 +// CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[TMP29:%.*]] = trunc i32 [[NVPTX_WARP_SIZE5]] to i16 +// CHECK2-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP28]], i16 [[TMP7]], i16 [[TMP29]]) +// CHECK2-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 +// CHECK2-NEXT: store i16 [[TMP31]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 +// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 +// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 +// CHECK2-NEXT: [[TMP34:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* +// CHECK2-NEXT: store i8* [[TMP34]], i8** [[TMP23]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK2-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK2-NEXT: [[TMP37:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP38:%.*]] = and i1 [[TMP36]], [[TMP37]] +// CHECK2-NEXT: [[TMP39:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK2-NEXT: [[TMP40:%.*]] = and i16 [[TMP6]], 1 +// CHECK2-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP40]], 0 +// CHECK2-NEXT: [[TMP42:%.*]] = and i1 [[TMP39]], [[TMP41]] +// CHECK2-NEXT: [[TMP43:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK2-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] +// CHECK2-NEXT: [[TMP45:%.*]] = or i1 [[TMP35]], [[TMP38]] +// CHECK2-NEXT: [[TMP46:%.*]] = or i1 [[TMP45]], [[TMP44]] +// CHECK2-NEXT: br i1 [[TMP46]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK2: then: +// CHECK2-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* +// CHECK2-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func11"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR3]] +// CHECK2-NEXT: br label [[IFCONT:%.*]] +// CHECK2: else: +// CHECK2-NEXT: br label [[IFCONT]] +// CHECK2: ifcont: +// CHECK2-NEXT: [[TMP49:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK2-NEXT: [[TMP50:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP51:%.*]] = and i1 [[TMP49]], [[TMP50]] +// CHECK2-NEXT: br i1 [[TMP51]], label [[THEN6:%.*]], label [[ELSE7:%.*]] +// CHECK2: then6: +// CHECK2-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP53:%.*]] = load i8*, i8** [[TMP52]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP55:%.*]] = load i8*, i8** [[TMP54]], align 4 +// CHECK2-NEXT: [[TMP56:%.*]] = bitcast i8* [[TMP53]] to i32* +// CHECK2-NEXT: [[TMP57:%.*]] = bitcast i8* [[TMP55]] to i32* +// CHECK2-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP56]], align 4 +// CHECK2-NEXT: store i32 [[TMP58]], i32* [[TMP57]], align 4 +// CHECK2-NEXT: [[TMP59:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP60:%.*]] = load i8*, i8** [[TMP59]], align 4 +// CHECK2-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP62:%.*]] = load i8*, i8** [[TMP61]], align 4 +// CHECK2-NEXT: [[TMP63:%.*]] = bitcast i8* [[TMP60]] to i16* +// CHECK2-NEXT: [[TMP64:%.*]] = bitcast i8* [[TMP62]] to i16* +// CHECK2-NEXT: [[TMP65:%.*]] = load i16, i16* [[TMP63]], align 2 +// CHECK2-NEXT: store i16 [[TMP65]], i16* [[TMP64]], align 2 +// CHECK2-NEXT: br label [[IFCONT8:%.*]] +// CHECK2: else7: +// CHECK2-NEXT: br label [[IFCONT8]] +// CHECK2: ifcont8: +// CHECK2-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__ -// CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], double* nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[E_ADDR:%.*]] = alloca double*, align 4 -// CHECK6-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 -// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 8, i16 1) -// CHECK6-NEXT: [[TMP2:%.*]] = bitcast i8* [[TMP1]] to %struct._globalized_locals_ty* -// CHECK6-NEXT: [[E1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP2]], i32 0, i32 0 -// CHECK6-NEXT: store double 0.000000e+00, double* [[E1]], align 8 -// CHECK6-NEXT: [[TMP3:%.*]] = load double, double* [[E1]], align 8 -// CHECK6-NEXT: [[ADD:%.*]] = fadd double [[TMP3]], 5.000000e+00 -// CHECK6-NEXT: store double [[ADD]], double* [[E1]], align 8 -// CHECK6-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP7:%.*]] = bitcast double* [[E1]] to i8* -// CHECK6-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK6-NEXT: [[TMP9:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK6-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i8* [[TMP9]], i32 2048, i8* [[TMP8]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func) -// CHECK6-NEXT: [[TMP11:%.*]] = icmp eq i32 [[TMP10]], 1 -// CHECK6-NEXT: br i1 [[TMP11]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] -// CHECK6: .omp.reduction.then: -// CHECK6-NEXT: [[TMP12:%.*]] = load double, double* [[TMP0]], align 8 -// CHECK6-NEXT: [[TMP13:%.*]] = load double, double* [[E1]], align 8 -// CHECK6-NEXT: [[ADD2:%.*]] = fadd double [[TMP12]], [[TMP13]] -// CHECK6-NEXT: store double [[ADD2]], double* [[TMP0]], align 8 -// CHECK6-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) -// CHECK6-NEXT: br label [[DOTOMP_REDUCTION_DONE]] -// CHECK6: .omp.reduction.done: -// CHECK6-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP1]]) -// CHECK6-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func13 +// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK2-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK2-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 +// CHECK2-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK2-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 +// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP2]]) +// CHECK2-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK2-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK2: then: +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK2-NEXT: store volatile i32 [[TMP9]], i32 addrspace(3)* [[TMP8]], align 4 +// CHECK2-NEXT: br label [[IFCONT:%.*]] +// CHECK2: else: +// CHECK2-NEXT: br label [[IFCONT]] +// CHECK2: ifcont: +// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] +// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK2: then4: +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to i32* +// CHECK2-NEXT: [[TMP15:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP11]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK2-NEXT: br label [[IFCONT6:%.*]] +// CHECK2: else5: +// CHECK2-NEXT: br label [[IFCONT6]] +// CHECK2: ifcont6: +// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK2-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK2-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] +// CHECK2: then8: +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i16* +// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK2-NEXT: [[TMP20:%.*]] = bitcast i32 addrspace(3)* [[TMP19]] to i16 addrspace(3)* +// CHECK2-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP18]], align 2 +// CHECK2-NEXT: store volatile i16 [[TMP21]], i16 addrspace(3)* [[TMP20]], align 2 +// CHECK2-NEXT: br label [[IFCONT10:%.*]] +// CHECK2: else9: +// CHECK2-NEXT: br label [[IFCONT10]] +// CHECK2: ifcont10: +// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP22]] +// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] +// CHECK2: then12: +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK2-NEXT: [[TMP24:%.*]] = bitcast i32 addrspace(3)* [[TMP23]] to i16 addrspace(3)* +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP26:%.*]] = load i8*, i8** [[TMP25]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = bitcast i8* [[TMP26]] to i16* +// CHECK2-NEXT: [[TMP28:%.*]] = load volatile i16, i16 addrspace(3)* [[TMP24]], align 2 +// CHECK2-NEXT: store i16 [[TMP28]], i16* [[TMP27]], align 2 +// CHECK2-NEXT: br label [[IFCONT14:%.*]] +// CHECK2: else13: +// CHECK2-NEXT: br label [[IFCONT14]] +// CHECK2: ifcont14: +// CHECK2-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func -// CHECK6-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 -// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 -// CHECK6-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK6-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 4 -// CHECK6-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8 -// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 -// CHECK6-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 -// CHECK6-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 -// CHECK6-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* -// CHECK6-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 -// CHECK6-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 -// CHECK6-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 -// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 -// CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to double* -// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP12]], i32 1 -// CHECK6-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8* -// CHECK6-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP12]] to i64* -// CHECK6-NEXT: [[TMP16:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i64* -// CHECK6-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 8 -// CHECK6-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK6-NEXT: [[TMP18:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 -// CHECK6-NEXT: [[TMP19:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP18]]) -// CHECK6-NEXT: store i64 [[TMP19]], i64* [[TMP16]], align 8 -// CHECK6-NEXT: [[TMP20:%.*]] = getelementptr i64, i64* [[TMP15]], i32 1 -// CHECK6-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP16]], i32 1 -// CHECK6-NEXT: [[TMP22:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK6-NEXT: store i8* [[TMP22]], i8** [[TMP11]], align 4 -// CHECK6-NEXT: [[TMP23:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK6-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK6-NEXT: [[TMP25:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK6-NEXT: [[TMP26:%.*]] = and i1 [[TMP24]], [[TMP25]] -// CHECK6-NEXT: [[TMP27:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK6-NEXT: [[TMP28:%.*]] = and i16 [[TMP6]], 1 -// CHECK6-NEXT: [[TMP29:%.*]] = icmp eq i16 [[TMP28]], 0 -// CHECK6-NEXT: [[TMP30:%.*]] = and i1 [[TMP27]], [[TMP29]] -// CHECK6-NEXT: [[TMP31:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK6-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]] -// CHECK6-NEXT: [[TMP33:%.*]] = or i1 [[TMP23]], [[TMP26]] -// CHECK6-NEXT: [[TMP34:%.*]] = or i1 [[TMP33]], [[TMP32]] -// CHECK6-NEXT: br i1 [[TMP34]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK6: then: -// CHECK6-NEXT: [[TMP35:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* -// CHECK6-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK6-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR3]] -// CHECK6-NEXT: br label [[IFCONT:%.*]] -// CHECK6: else: -// CHECK6-NEXT: br label [[IFCONT]] -// CHECK6: ifcont: -// CHECK6-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK6-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK6-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] -// CHECK6-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK6: then4: -// CHECK6-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP41:%.*]] = load i8*, i8** [[TMP40]], align 4 -// CHECK6-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 4 -// CHECK6-NEXT: [[TMP44:%.*]] = bitcast i8* [[TMP41]] to double* -// CHECK6-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP43]] to double* -// CHECK6-NEXT: [[TMP46:%.*]] = load double, double* [[TMP44]], align 8 -// CHECK6-NEXT: store double [[TMP46]], double* [[TMP45]], align 8 -// CHECK6-NEXT: br label [[IFCONT6:%.*]] -// CHECK6: else5: -// CHECK6-NEXT: br label [[IFCONT6]] -// CHECK6: ifcont6: -// CHECK6-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func15 +// CHECK2-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2 +// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 +// CHECK2-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 +// CHECK2-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 +// CHECK2-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* +// CHECK2-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 +// CHECK2-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 +// CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 +// CHECK2-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK2-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[TMP16:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 +// CHECK2-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP16]]) +// CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 +// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK2-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK2-NEXT: store i8* [[TMP20]], i8** [[TMP11]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP22]] to i16* +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 +// CHECK2-NEXT: [[TMP26:%.*]] = bitcast i16* [[TMP25]] to i8* +// CHECK2-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP24]], align 2 +// CHECK2-NEXT: [[TMP28:%.*]] = sext i16 [[TMP27]] to i32 +// CHECK2-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK2-NEXT: [[TMP29:%.*]] = trunc i32 [[NVPTX_WARP_SIZE5]] to i16 +// CHECK2-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP28]], i16 [[TMP7]], i16 [[TMP29]]) +// CHECK2-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 +// CHECK2-NEXT: store i16 [[TMP31]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 +// CHECK2-NEXT: [[TMP32:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 +// CHECK2-NEXT: [[TMP33:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 +// CHECK2-NEXT: [[TMP34:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* +// CHECK2-NEXT: store i8* [[TMP34]], i8** [[TMP23]], align 4 +// CHECK2-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK2-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK2-NEXT: [[TMP37:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP38:%.*]] = and i1 [[TMP36]], [[TMP37]] +// CHECK2-NEXT: [[TMP39:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK2-NEXT: [[TMP40:%.*]] = and i16 [[TMP6]], 1 +// CHECK2-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP40]], 0 +// CHECK2-NEXT: [[TMP42:%.*]] = and i1 [[TMP39]], [[TMP41]] +// CHECK2-NEXT: [[TMP43:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK2-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] +// CHECK2-NEXT: [[TMP45:%.*]] = or i1 [[TMP35]], [[TMP38]] +// CHECK2-NEXT: [[TMP46:%.*]] = or i1 [[TMP45]], [[TMP44]] +// CHECK2-NEXT: br i1 [[TMP46]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK2: then: +// CHECK2-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* +// CHECK2-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR3]] +// CHECK2-NEXT: br label [[IFCONT:%.*]] +// CHECK2: else: +// CHECK2-NEXT: br label [[IFCONT]] +// CHECK2: ifcont: +// CHECK2-NEXT: [[TMP49:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK2-NEXT: [[TMP50:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK2-NEXT: [[TMP51:%.*]] = and i1 [[TMP49]], [[TMP50]] +// CHECK2-NEXT: br i1 [[TMP51]], label [[THEN6:%.*]], label [[ELSE7:%.*]] +// CHECK2: then6: +// CHECK2-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP53:%.*]] = load i8*, i8** [[TMP52]], align 4 +// CHECK2-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP55:%.*]] = load i8*, i8** [[TMP54]], align 4 +// CHECK2-NEXT: [[TMP56:%.*]] = bitcast i8* [[TMP53]] to i32* +// CHECK2-NEXT: [[TMP57:%.*]] = bitcast i8* [[TMP55]] to i32* +// CHECK2-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP56]], align 4 +// CHECK2-NEXT: store i32 [[TMP58]], i32* [[TMP57]], align 4 +// CHECK2-NEXT: [[TMP59:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP60:%.*]] = load i8*, i8** [[TMP59]], align 4 +// CHECK2-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP62:%.*]] = load i8*, i8** [[TMP61]], align 4 +// CHECK2-NEXT: [[TMP63:%.*]] = bitcast i8* [[TMP60]] to i16* +// CHECK2-NEXT: [[TMP64:%.*]] = bitcast i8* [[TMP62]] to i16* +// CHECK2-NEXT: [[TMP65:%.*]] = load i16, i16* [[TMP63]], align 2 +// CHECK2-NEXT: store i16 [[TMP65]], i16* [[TMP64]], align 2 +// CHECK2-NEXT: br label [[IFCONT8:%.*]] +// CHECK2: else7: +// CHECK2-NEXT: br label [[IFCONT8]] +// CHECK2: ifcont8: +// CHECK2-NEXT: ret void +// +// +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func16 +// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK2-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK2-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 +// CHECK2-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK2-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 +// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK2-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK2-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK2: then: +// CHECK2-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK2-NEXT: store volatile i32 [[TMP9]], i32 addrspace(3)* [[TMP8]], align 4 +// CHECK2-NEXT: br label [[IFCONT:%.*]] +// CHECK2: else: +// CHECK2-NEXT: br label [[IFCONT]] +// CHECK2: ifcont: +// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] +// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK2: then4: +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK2-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to i32* +// CHECK2-NEXT: [[TMP15:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP11]], align 4 +// CHECK2-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK2-NEXT: br label [[IFCONT6:%.*]] +// CHECK2: else5: +// CHECK2-NEXT: br label [[IFCONT6]] +// CHECK2: ifcont6: +// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK2-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK2-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] +// CHECK2: then8: +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i16* +// CHECK2-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK2-NEXT: [[TMP20:%.*]] = bitcast i32 addrspace(3)* [[TMP19]] to i16 addrspace(3)* +// CHECK2-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP18]], align 2 +// CHECK2-NEXT: store volatile i16 [[TMP21]], i16 addrspace(3)* [[TMP20]], align 2 +// CHECK2-NEXT: br label [[IFCONT10:%.*]] +// CHECK2: else9: +// CHECK2-NEXT: br label [[IFCONT10]] +// CHECK2: ifcont10: +// CHECK2-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP22]] +// CHECK2-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] +// CHECK2: then12: +// CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK2-NEXT: [[TMP24:%.*]] = bitcast i32 addrspace(3)* [[TMP23]] to i16 addrspace(3)* +// CHECK2-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP26:%.*]] = load i8*, i8** [[TMP25]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = bitcast i8* [[TMP26]] to i16* +// CHECK2-NEXT: [[TMP28:%.*]] = load volatile i16, i16 addrspace(3)* [[TMP24]], align 2 +// CHECK2-NEXT: store i16 [[TMP28]], i16* [[TMP27]], align 2 +// CHECK2-NEXT: br label [[IFCONT14:%.*]] +// CHECK2: else13: +// CHECK2-NEXT: br label [[IFCONT14]] +// CHECK2: ifcont14: +// CHECK2-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func -// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK6-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK6-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 -// CHECK6-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK6-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 -// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* -// CHECK6-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4 -// CHECK6-NEXT: br label [[PRECOND:%.*]] -// CHECK6: precond: -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP5]], 2 -// CHECK6-NEXT: br i1 [[TMP6]], label [[BODY:%.*]], label [[EXIT:%.*]] -// CHECK6: body: -// CHECK6-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) -// CHECK6-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK6-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK6: then: -// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* -// CHECK6-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[TMP9]], i32 [[TMP5]] -// CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK6-NEXT: store volatile i32 [[TMP12]], i32 addrspace(3)* [[TMP11]], align 4 -// CHECK6-NEXT: br label [[IFCONT:%.*]] -// CHECK6: else: -// CHECK6-NEXT: br label [[IFCONT]] -// CHECK6: ifcont: -// CHECK6-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP13]] -// CHECK6-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK6: then4: -// CHECK6-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK6-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 4 -// CHECK6-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i32* -// CHECK6-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP17]], i32 [[TMP5]] -// CHECK6-NEXT: [[TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4 -// CHECK6-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4 -// CHECK6-NEXT: br label [[IFCONT6:%.*]] -// CHECK6: else5: -// CHECK6-NEXT: br label [[IFCONT6]] -// CHECK6: ifcont6: -// CHECK6-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP5]], 1 -// CHECK6-NEXT: store i32 [[TMP20]], i32* [[DOTCNT_ADDR]], align 4 -// CHECK6-NEXT: br label [[PRECOND]] -// CHECK6: exit: -// CHECK6-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func17 +// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK2-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.1* +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP7]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK2-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 128 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i16* +// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP7]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP15]], align 2 +// CHECK2-NEXT: store i16 [[TMP17]], i16* [[TMP16]], align 128 +// CHECK2-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func -// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* -// CHECK6-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.0* -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 -// CHECK6-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to double* -// CHECK6-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2048 x double], [2048 x double]* [[E]], i32 0, i32 [[TMP7]] -// CHECK6-NEXT: [[TMP12:%.*]] = load double, double* [[TMP10]], align 8 -// CHECK6-NEXT: store double [[TMP12]], double* [[TMP11]], align 128 -// CHECK6-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func18 +// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.1* +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* +// CHECK2-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to i8* +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK2-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP12]], i8* [[TMP13]]) #[[ATTR3]] +// CHECK2-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func -// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 -// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.0* -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK6-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x double], [2048 x double]* [[E]], i32 0, i32 [[TMP5]] -// CHECK6-NEXT: [[TMP8:%.*]] = bitcast double* [[TMP7]] to i8* -// CHECK6-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK6-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK6-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP9]], i8* [[TMP10]]) #[[ATTR3]] -// CHECK6-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func19 +// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK2-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.1* +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP7]] +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 128 +// CHECK2-NEXT: store i32 [[TMP12]], i32* [[TMP10]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i16* +// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP16:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP7]] +// CHECK2-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP16]], align 128 +// CHECK2-NEXT: store i16 [[TMP17]], i16* [[TMP15]], align 2 +// CHECK2-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func -// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* -// CHECK6-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.0* -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 -// CHECK6-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to double* -// CHECK6-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2048 x double], [2048 x double]* [[E]], i32 0, i32 [[TMP7]] -// CHECK6-NEXT: [[TMP12:%.*]] = load double, double* [[TMP11]], align 128 -// CHECK6-NEXT: store double [[TMP12]], double* [[TMP10]], align 8 -// CHECK6-NEXT: ret void +// CHECK2-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func20 +// CHECK2-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK2-NEXT: entry: +// CHECK2-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK2-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK2-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK2-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK2-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK2-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.1* +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 0 +// CHECK2-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1024 x i32], [1024 x i32]* [[A]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* +// CHECK2-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK2-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 1 +// CHECK2-NEXT: [[TMP10:%.*]] = getelementptr inbounds [1024 x i16], [1024 x i16]* [[B]], i32 0, i32 [[TMP5]] +// CHECK2-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to i8* +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK2-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK2-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP13]], i8* [[TMP12]]) #[[ATTR3]] +// CHECK2-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_worker +// CHECK3-SAME: () #[[ATTR0:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK3-NEXT: store i8* null, i8** [[WORK_FN]], align 4 +// CHECK3-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK3-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK3: .await.work: +// CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK3-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK3-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK3-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK3-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK3-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK3: .select.workers: +// CHECK3-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK3-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK3-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK3: .execute.parallel: +// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) +// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK3-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) +// CHECK3-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK3: .terminate.parallel: +// CHECK3-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK3-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK3: .barrier.parallel: +// CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK3-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK3: .exit: +// CHECK3-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func -// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 -// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.0* -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK6-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x double], [2048 x double]* [[E]], i32 0, i32 [[TMP5]] -// CHECK6-NEXT: [[TMP8:%.*]] = bitcast double* [[TMP7]] to i8* -// CHECK6-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK6-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK6-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP10]], i8* [[TMP9]]) #[[ATTR3]] -// CHECK6-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20 +// CHECK3-SAME: (double* nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1:[0-9]+]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[E_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[E7:%.*]] = alloca double, align 8 +// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK3-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 +// CHECK3-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK3-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK3-NEXT: [[TMP1:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK3-NEXT: br i1 [[TMP1]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK3: .worker: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l20_worker() #[[ATTR3:[0-9]+]] +// CHECK3-NEXT: br label [[DOTEXIT:%.*]] +// CHECK3: .mastercheck: +// CHECK3-NEXT: [[NVPTX_TID1:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK3-NEXT: [[NVPTX_NUM_THREADS2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: [[NVPTX_WARP_SIZE3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK3-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS2]], 1 +// CHECK3-NEXT: [[TMP4:%.*]] = xor i32 [[TMP2]], -1 +// CHECK3-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP3]], [[TMP4]] +// CHECK3-NEXT: [[TMP5:%.*]] = icmp eq i32 [[NVPTX_TID1]], [[MASTER_TID]] +// CHECK3-NEXT: br i1 [[TMP5]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK3: .master: +// CHECK3-NEXT: [[NVPTX_NUM_THREADS4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK3-NEXT: [[THREAD_LIMIT6:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS4]], [[NVPTX_WARP_SIZE5]] +// CHECK3-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT6]], i16 1) +// CHECK3-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP7:%.*]] = load double, double* [[TMP0]], align 8 +// CHECK3-NEXT: store double [[TMP7]], double* [[E7]], align 8 +// CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: call void @__omp_outlined__(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], double* [[E7]]) #[[ATTR3]] +// CHECK3-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK3: .termination.notifier: +// CHECK3-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK3-NEXT: br label [[DOTEXIT]] +// CHECK3: .exit: +// CHECK3-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_worker -// CHECK6-SAME: () #[[ATTR0]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 -// CHECK6-NEXT: store i8* null, i8** [[WORK_FN]], align 4 -// CHECK6-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 -// CHECK6-NEXT: br label [[DOTAWAIT_WORK:%.*]] -// CHECK6: .await.work: -// CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK6-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) -// CHECK6-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 -// CHECK6-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 -// CHECK6-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 -// CHECK6-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null -// CHECK6-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] -// CHECK6: .select.workers: -// CHECK6-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 -// CHECK6-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 -// CHECK6-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] -// CHECK6: .execute.parallel: -// CHECK6-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK6-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* -// CHECK6-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) -// CHECK6-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] -// CHECK6: .terminate.parallel: -// CHECK6-NEXT: call void @__kmpc_kernel_end_parallel() -// CHECK6-NEXT: br label [[DOTBARRIER_PARALLEL]] -// CHECK6: .barrier.parallel: -// CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK6-NEXT: br label [[DOTAWAIT_WORK]] -// CHECK6: .exit: -// CHECK6-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__ +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], double* nonnull align 8 dereferenceable(8) [[E:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[E_ADDR:%.*]] = alloca double*, align 4 +// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store double* [[E]], double** [[E_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load double*, double** [[E_ADDR]], align 4 +// CHECK3-NEXT: [[E1:%.*]] = call i8* @__kmpc_alloc_shared(i32 8) +// CHECK3-NEXT: [[E_ON_STACK:%.*]] = bitcast i8* [[E1]] to double* +// CHECK3-NEXT: store double 0.000000e+00, double* [[E_ON_STACK]], align 8 +// CHECK3-NEXT: [[TMP1:%.*]] = load double, double* [[E_ON_STACK]], align 8 +// CHECK3-NEXT: [[ADD:%.*]] = fadd double [[TMP1]], 5.000000e+00 +// CHECK3-NEXT: store double [[ADD]], double* [[E_ON_STACK]], align 8 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i32, i32* [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP5:%.*]] = bitcast double* [[E_ON_STACK]] to i8* +// CHECK3-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP3]], i8* [[TMP7]], i32 2048, i8* [[TMP6]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func) +// CHECK3-NEXT: [[TMP9:%.*]] = icmp eq i32 [[TMP8]], 1 +// CHECK3-NEXT: br i1 [[TMP9]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK3: .omp.reduction.then: +// CHECK3-NEXT: [[TMP10:%.*]] = load double, double* [[TMP0]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load double, double* [[E_ON_STACK]], align 8 +// CHECK3-NEXT: [[ADD2:%.*]] = fadd double [[TMP10]], [[TMP11]] +// CHECK3-NEXT: store double [[ADD2]], double* [[TMP0]], align 8 +// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP3]]) +// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK3: .omp.reduction.done: +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[E1]]) +// CHECK3-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29 -// CHECK6-SAME: (i32 [[C:%.*]], i32 [[D:%.*]]) #[[ATTR1]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK6-NEXT: store i32 [[C]], i32* [[C_ADDR]], align 4 -// CHECK6-NEXT: store i32 [[D]], i32* [[D_ADDR]], align 4 -// CHECK6-NEXT: [[CONV:%.*]] = bitcast i32* [[C_ADDR]] to i8* -// CHECK6-NEXT: [[CONV1:%.*]] = bitcast i32* [[D_ADDR]] to float* -// CHECK6-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK6-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK6-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK6-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] -// CHECK6-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] -// CHECK6-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] -// CHECK6: .worker: -// CHECK6-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l29_worker() #[[ATTR3]] -// CHECK6-NEXT: br label [[DOTEXIT:%.*]] -// CHECK6: .mastercheck: -// CHECK6-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK6-NEXT: [[NVPTX_NUM_THREADS3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK6-NEXT: [[NVPTX_WARP_SIZE4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK6-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE4]], 1 -// CHECK6-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS3]], 1 -// CHECK6-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 -// CHECK6-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] -// CHECK6-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID2]], [[MASTER_TID]] -// CHECK6-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] -// CHECK6: .master: -// CHECK6-NEXT: [[NVPTX_NUM_THREADS5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK6-NEXT: [[NVPTX_WARP_SIZE6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK6-NEXT: [[THREAD_LIMIT7:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS5]], [[NVPTX_WARP_SIZE6]] -// CHECK6-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT7]], i16 1) -// CHECK6-NEXT: call void @__kmpc_data_sharing_init_stack() -// CHECK6-NEXT: [[TMP5:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 8, i16 1) -// CHECK6-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.1* -// CHECK6-NEXT: [[TMP7:%.*]] = load i8, i8* [[CONV]], align 4 -// CHECK6-NEXT: [[C8:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 1 -// CHECK6-NEXT: store i8 [[TMP7]], i8* [[C8]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load float, float* [[CONV1]], align 4 -// CHECK6-NEXT: [[D9:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 0 -// CHECK6-NEXT: store float [[TMP8]], float* [[D9]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK6-NEXT: store i32 [[TMP9]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK6-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C8]], float* [[D9]]) #[[ATTR3]] -// CHECK6-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP5]]) -// CHECK6-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] -// CHECK6: .termination.notifier: -// CHECK6-NEXT: call void @__kmpc_kernel_deinit(i16 1) -// CHECK6-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) -// CHECK6-NEXT: br label [[DOTEXIT]] -// CHECK6: .exit: -// CHECK6-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func +// CHECK3-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [1 x i8*], align 4 +// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca double, align 8 +// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 +// CHECK3-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 +// CHECK3-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 +// CHECK3-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [1 x i8*]* +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 +// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to double* +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr double, double* [[TMP12]], i32 1 +// CHECK3-NEXT: [[TMP14:%.*]] = bitcast double* [[TMP13]] to i8* +// CHECK3-NEXT: [[TMP15:%.*]] = bitcast double* [[TMP12]] to i64* +// CHECK3-NEXT: [[TMP16:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i64* +// CHECK3-NEXT: [[TMP17:%.*]] = load i64, i64* [[TMP15]], align 8 +// CHECK3-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK3-NEXT: [[TMP18:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 +// CHECK3-NEXT: [[TMP19:%.*]] = call i64 @__kmpc_shuffle_int64(i64 [[TMP17]], i16 [[TMP7]], i16 [[TMP18]]) +// CHECK3-NEXT: store i64 [[TMP19]], i64* [[TMP16]], align 8 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr i64, i64* [[TMP15]], i32 1 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr i64, i64* [[TMP16]], i32 1 +// CHECK3-NEXT: [[TMP22:%.*]] = bitcast double* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK3-NEXT: store i8* [[TMP22]], i8** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK3-NEXT: [[TMP24:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP25:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP26:%.*]] = and i1 [[TMP24]], [[TMP25]] +// CHECK3-NEXT: [[TMP27:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK3-NEXT: [[TMP28:%.*]] = and i16 [[TMP6]], 1 +// CHECK3-NEXT: [[TMP29:%.*]] = icmp eq i16 [[TMP28]], 0 +// CHECK3-NEXT: [[TMP30:%.*]] = and i1 [[TMP27]], [[TMP29]] +// CHECK3-NEXT: [[TMP31:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP32:%.*]] = and i1 [[TMP30]], [[TMP31]] +// CHECK3-NEXT: [[TMP33:%.*]] = or i1 [[TMP23]], [[TMP26]] +// CHECK3-NEXT: [[TMP34:%.*]] = or i1 [[TMP33]], [[TMP32]] +// CHECK3-NEXT: br i1 [[TMP34]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK3: then: +// CHECK3-NEXT: [[TMP35:%.*]] = bitcast [1 x i8*]* [[TMP5]] to i8* +// CHECK3-NEXT: [[TMP36:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP35]], i8* [[TMP36]]) #[[ATTR3]] +// CHECK3-NEXT: br label [[IFCONT:%.*]] +// CHECK3: else: +// CHECK3-NEXT: br label [[IFCONT]] +// CHECK3: ifcont: +// CHECK3-NEXT: [[TMP37:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP38:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP39:%.*]] = and i1 [[TMP37]], [[TMP38]] +// CHECK3-NEXT: br i1 [[TMP39]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK3: then4: +// CHECK3-NEXT: [[TMP40:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP41:%.*]] = load i8*, i8** [[TMP40]], align 4 +// CHECK3-NEXT: [[TMP42:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP43:%.*]] = load i8*, i8** [[TMP42]], align 4 +// CHECK3-NEXT: [[TMP44:%.*]] = bitcast i8* [[TMP41]] to double* +// CHECK3-NEXT: [[TMP45:%.*]] = bitcast i8* [[TMP43]] to double* +// CHECK3-NEXT: [[TMP46:%.*]] = load double, double* [[TMP44]], align 8 +// CHECK3-NEXT: store double [[TMP46]], double* [[TMP45]], align 8 +// CHECK3-NEXT: br label [[IFCONT6:%.*]] +// CHECK3: else5: +// CHECK3-NEXT: br label [[IFCONT6]] +// CHECK3: ifcont6: +// CHECK3-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__1 -// CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8* nonnull align 1 dereferenceable(1) [[C:%.*]], float* nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 -// CHECK6-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 4 -// CHECK6-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = call i8* @__kmpc_data_sharing_push_stack(i32 8, i16 1) -// CHECK6-NEXT: [[TMP3:%.*]] = bitcast i8* [[TMP2]] to %struct._globalized_locals_ty.2* -// CHECK6-NEXT: [[C1:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2:%.*]], %struct._globalized_locals_ty.2* [[TMP3]], i32 0, i32 1 -// CHECK6-NEXT: [[D2:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_2]], %struct._globalized_locals_ty.2* [[TMP3]], i32 0, i32 0 -// CHECK6-NEXT: store i8 0, i8* [[C1]], align 4 -// CHECK6-NEXT: store float 1.000000e+00, float* [[D2]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = load i8, i8* [[C1]], align 4 -// CHECK6-NEXT: [[CONV:%.*]] = sext i8 [[TMP4]] to i32 -// CHECK6-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2 -// CHECK6-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8 -// CHECK6-NEXT: store i8 [[CONV3]], i8* [[C1]], align 4 -// CHECK6-NEXT: [[TMP5:%.*]] = load float, float* [[D2]], align 4 -// CHECK6-NEXT: [[MUL:%.*]] = fmul float [[TMP5]], 3.300000e+01 -// CHECK6-NEXT: store float [[MUL]], float* [[D2]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK6-NEXT: store i8* [[C1]], i8** [[TMP8]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP10:%.*]] = bitcast float* [[D2]] to i8* -// CHECK6-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 -// CHECK6-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK6-NEXT: [[TMP12:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP7]], i8* [[TMP12]], i32 2048, i8* [[TMP11]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func3, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func4, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func5, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func6, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func7, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func8) -// CHECK6-NEXT: [[TMP14:%.*]] = icmp eq i32 [[TMP13]], 1 -// CHECK6-NEXT: br i1 [[TMP14]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] -// CHECK6: .omp.reduction.then: -// CHECK6-NEXT: [[TMP15:%.*]] = load i8, i8* [[TMP0]], align 1 -// CHECK6-NEXT: [[CONV4:%.*]] = sext i8 [[TMP15]] to i32 -// CHECK6-NEXT: [[TMP16:%.*]] = load i8, i8* [[C1]], align 4 -// CHECK6-NEXT: [[CONV5:%.*]] = sext i8 [[TMP16]] to i32 -// CHECK6-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] -// CHECK6-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 -// CHECK6-NEXT: store i8 [[CONV7]], i8* [[TMP0]], align 1 -// CHECK6-NEXT: [[TMP17:%.*]] = load float, float* [[TMP1]], align 4 -// CHECK6-NEXT: [[TMP18:%.*]] = load float, float* [[D2]], align 4 -// CHECK6-NEXT: [[MUL8:%.*]] = fmul float [[TMP17]], [[TMP18]] -// CHECK6-NEXT: store float [[MUL8]], float* [[TMP1]], align 4 -// CHECK6-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP7]]) -// CHECK6-NEXT: br label [[DOTOMP_REDUCTION_DONE]] -// CHECK6: .omp.reduction.done: -// CHECK6-NEXT: call void @__kmpc_data_sharing_pop_stack(i8* [[TMP2]]) -// CHECK6-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func +// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTCNT_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK3-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK3-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 +// CHECK3-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 +// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* +// CHECK3-NEXT: store i32 0, i32* [[DOTCNT_ADDR]], align 4 +// CHECK3-NEXT: br label [[PRECOND:%.*]] +// CHECK3: precond: +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTCNT_ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = icmp ult i32 [[TMP5]], 2 +// CHECK3-NEXT: br i1 [[TMP6]], label [[BODY:%.*]], label [[EXIT:%.*]] +// CHECK3: body: +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK3: then: +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP8:%.*]] = load i8*, i8** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = bitcast i8* [[TMP8]] to i32* +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr i32, i32* [[TMP9]], i32 [[TMP5]] +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK3-NEXT: store volatile i32 [[TMP12]], i32 addrspace(3)* [[TMP11]], align 4 +// CHECK3-NEXT: br label [[IFCONT:%.*]] +// CHECK3: else: +// CHECK3-NEXT: br label [[IFCONT]] +// CHECK3: ifcont: +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP13]] +// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK3: then4: +// CHECK3-NEXT: [[TMP14:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP16:%.*]] = load i8*, i8** [[TMP15]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = bitcast i8* [[TMP16]] to i32* +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP17]], i32 [[TMP5]] +// CHECK3-NEXT: [[TMP19:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP14]], align 4 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[TMP18]], align 4 +// CHECK3-NEXT: br label [[IFCONT6:%.*]] +// CHECK3: else5: +// CHECK3-NEXT: br label [[IFCONT6]] +// CHECK3: ifcont6: +// CHECK3-NEXT: [[TMP20:%.*]] = add nsw i32 [[TMP5]], 1 +// CHECK3-NEXT: store i32 [[TMP20]], i32* [[DOTCNT_ADDR]], align 4 +// CHECK3-NEXT: br label [[PRECOND]] +// CHECK3: exit: +// CHECK3-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func3 -// CHECK6-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 -// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 -// CHECK6-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK6-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK6-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i8, align 1 -// CHECK6-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca float, align 4 -// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 -// CHECK6-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 -// CHECK6-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 -// CHECK6-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* -// CHECK6-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 -// CHECK6-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 -// CHECK6-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 -// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 -// CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[TMP10]], i32 1 -// CHECK6-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP10]], align 1 -// CHECK6-NEXT: [[TMP14:%.*]] = sext i8 [[TMP13]] to i32 -// CHECK6-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK6-NEXT: [[TMP15:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 -// CHECK6-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP14]], i16 [[TMP7]], i16 [[TMP15]]) -// CHECK6-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8 -// CHECK6-NEXT: store i8 [[TMP17]], i8* [[DOTOMP_REDUCTION_ELEMENT]], align 1 -// CHECK6-NEXT: [[TMP18:%.*]] = getelementptr i8, i8* [[TMP10]], i32 1 -// CHECK6-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK6-NEXT: store i8* [[DOTOMP_REDUCTION_ELEMENT]], i8** [[TMP11]], align 4 -// CHECK6-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 4 -// CHECK6-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP23:%.*]] = bitcast i8* [[TMP21]] to float* -// CHECK6-NEXT: [[TMP24:%.*]] = getelementptr float, float* [[TMP23]], i32 1 -// CHECK6-NEXT: [[TMP25:%.*]] = bitcast float* [[TMP24]] to i8* -// CHECK6-NEXT: [[TMP26:%.*]] = bitcast float* [[TMP23]] to i32* -// CHECK6-NEXT: [[TMP27:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i32* -// CHECK6-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP26]], align 4 -// CHECK6-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK6-NEXT: [[TMP29:%.*]] = trunc i32 [[NVPTX_WARP_SIZE5]] to i16 -// CHECK6-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP28]], i16 [[TMP7]], i16 [[TMP29]]) -// CHECK6-NEXT: store i32 [[TMP30]], i32* [[TMP27]], align 4 -// CHECK6-NEXT: [[TMP31:%.*]] = getelementptr i32, i32* [[TMP26]], i32 1 -// CHECK6-NEXT: [[TMP32:%.*]] = getelementptr i32, i32* [[TMP27]], i32 1 -// CHECK6-NEXT: [[TMP33:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* -// CHECK6-NEXT: store i8* [[TMP33]], i8** [[TMP22]], align 4 -// CHECK6-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK6-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK6-NEXT: [[TMP36:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK6-NEXT: [[TMP37:%.*]] = and i1 [[TMP35]], [[TMP36]] -// CHECK6-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK6-NEXT: [[TMP39:%.*]] = and i16 [[TMP6]], 1 -// CHECK6-NEXT: [[TMP40:%.*]] = icmp eq i16 [[TMP39]], 0 -// CHECK6-NEXT: [[TMP41:%.*]] = and i1 [[TMP38]], [[TMP40]] -// CHECK6-NEXT: [[TMP42:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK6-NEXT: [[TMP43:%.*]] = and i1 [[TMP41]], [[TMP42]] -// CHECK6-NEXT: [[TMP44:%.*]] = or i1 [[TMP34]], [[TMP37]] -// CHECK6-NEXT: [[TMP45:%.*]] = or i1 [[TMP44]], [[TMP43]] -// CHECK6-NEXT: br i1 [[TMP45]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK6: then: -// CHECK6-NEXT: [[TMP46:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* -// CHECK6-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK6-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP46]], i8* [[TMP47]]) #[[ATTR3]] -// CHECK6-NEXT: br label [[IFCONT:%.*]] -// CHECK6: else: -// CHECK6-NEXT: br label [[IFCONT]] -// CHECK6: ifcont: -// CHECK6-NEXT: [[TMP48:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK6-NEXT: [[TMP49:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK6-NEXT: [[TMP50:%.*]] = and i1 [[TMP48]], [[TMP49]] -// CHECK6-NEXT: br i1 [[TMP50]], label [[THEN6:%.*]], label [[ELSE7:%.*]] -// CHECK6: then6: -// CHECK6-NEXT: [[TMP51:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP52:%.*]] = load i8*, i8** [[TMP51]], align 4 -// CHECK6-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP54:%.*]] = load i8*, i8** [[TMP53]], align 4 -// CHECK6-NEXT: [[TMP55:%.*]] = load i8, i8* [[TMP52]], align 1 -// CHECK6-NEXT: store i8 [[TMP55]], i8* [[TMP54]], align 1 -// CHECK6-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP57:%.*]] = load i8*, i8** [[TMP56]], align 4 -// CHECK6-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP59:%.*]] = load i8*, i8** [[TMP58]], align 4 -// CHECK6-NEXT: [[TMP60:%.*]] = bitcast i8* [[TMP57]] to float* -// CHECK6-NEXT: [[TMP61:%.*]] = bitcast i8* [[TMP59]] to float* -// CHECK6-NEXT: [[TMP62:%.*]] = load float, float* [[TMP60]], align 4 -// CHECK6-NEXT: store float [[TMP62]], float* [[TMP61]], align 4 -// CHECK6-NEXT: br label [[IFCONT8:%.*]] -// CHECK6: else7: -// CHECK6-NEXT: br label [[IFCONT8]] -// CHECK6: ifcont8: -// CHECK6-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func +// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* +// CHECK3-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty* +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to double* +// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2048 x double], [2048 x double]* [[E]], i32 0, i32 [[TMP7]] +// CHECK3-NEXT: [[TMP12:%.*]] = load double, double* [[TMP10]], align 8 +// CHECK3-NEXT: store double [[TMP12]], double* [[TMP11]], align 128 +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func +// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 +// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty* +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x double], [2048 x double]* [[E]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast double* [[TMP7]] to i8* +// CHECK3-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP9]], i8* [[TMP10]]) #[[ATTR3]] +// CHECK3-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func4 -// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) -// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK6-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK6-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 -// CHECK6-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK6-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 -// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK6-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK6-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK6-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK6: then: -// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK6-NEXT: [[TMP8:%.*]] = bitcast i32 addrspace(3)* [[TMP7]] to i8 addrspace(3)* -// CHECK6-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP6]], align 1 -// CHECK6-NEXT: store volatile i8 [[TMP9]], i8 addrspace(3)* [[TMP8]], align 1 -// CHECK6-NEXT: br label [[IFCONT:%.*]] -// CHECK6: else: -// CHECK6-NEXT: br label [[IFCONT]] -// CHECK6: ifcont: -// CHECK6-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] -// CHECK6-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK6: then4: -// CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK6-NEXT: [[TMP12:%.*]] = bitcast i32 addrspace(3)* [[TMP11]] to i8 addrspace(3)* -// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 4 -// CHECK6-NEXT: [[TMP15:%.*]] = load volatile i8, i8 addrspace(3)* [[TMP12]], align 1 -// CHECK6-NEXT: store i8 [[TMP15]], i8* [[TMP14]], align 1 -// CHECK6-NEXT: br label [[IFCONT6:%.*]] -// CHECK6: else5: -// CHECK6-NEXT: br label [[IFCONT6]] -// CHECK6: ifcont6: -// CHECK6-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK6-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK6-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] -// CHECK6: then8: -// CHECK6-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 4 -// CHECK6-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i32* -// CHECK6-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP18]], align 4 -// CHECK6-NEXT: store volatile i32 [[TMP20]], i32 addrspace(3)* [[TMP19]], align 4 -// CHECK6-NEXT: br label [[IFCONT10:%.*]] -// CHECK6: else9: -// CHECK6-NEXT: br label [[IFCONT10]] -// CHECK6: ifcont10: -// CHECK6-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) -// CHECK6-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP21]] -// CHECK6-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] -// CHECK6: then12: -// CHECK6-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK6-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 4 -// CHECK6-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to i32* -// CHECK6-NEXT: [[TMP26:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP22]], align 4 -// CHECK6-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4 -// CHECK6-NEXT: br label [[IFCONT14:%.*]] -// CHECK6: else13: -// CHECK6-NEXT: br label [[IFCONT14]] -// CHECK6: ifcont14: -// CHECK6-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func +// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [1 x i8*]* +// CHECK3-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty* +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to double* +// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2048 x double], [2048 x double]* [[E]], i32 0, i32 [[TMP7]] +// CHECK3-NEXT: [[TMP12:%.*]] = load double, double* [[TMP11]], align 128 +// CHECK3-NEXT: store double [[TMP12]], double* [[TMP10]], align 8 +// CHECK3-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func5 -// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK6-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.3* -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 -// CHECK6-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3:%.*]], %struct._globalized_locals_ty.3* [[TMP6]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i8], [2048 x i8]* [[C]], i32 0, i32 [[TMP7]] -// CHECK6-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP9]], align 1 -// CHECK6-NEXT: store i8 [[TMP11]], i8* [[TMP10]], align 128 -// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 -// CHECK6-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to float* -// CHECK6-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3]], %struct._globalized_locals_ty.3* [[TMP6]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2048 x float], [2048 x float]* [[D]], i32 0, i32 [[TMP7]] -// CHECK6-NEXT: [[TMP16:%.*]] = load float, float* [[TMP14]], align 4 -// CHECK6-NEXT: store float [[TMP16]], float* [[TMP15]], align 128 -// CHECK6-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func +// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [1 x i8*], align 4 +// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty* +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [1 x i8*], [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[E:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY:%.*]], %struct._globalized_locals_ty* [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x double], [2048 x double]* [[E]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast double* [[TMP7]] to i8* +// CHECK3-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = bitcast [1 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func"(i8* [[TMP10]], i8* [[TMP9]]) #[[ATTR3]] +// CHECK3-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func6 -// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.3* -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK6-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3:%.*]], %struct._globalized_locals_ty.3* [[TMP4]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i8], [2048 x i8]* [[C]], i32 0, i32 [[TMP5]] -// CHECK6-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK6-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3]], %struct._globalized_locals_ty.3* [[TMP4]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2048 x float], [2048 x float]* [[D]], i32 0, i32 [[TMP5]] -// CHECK6-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to i8* -// CHECK6-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 4 -// CHECK6-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK6-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK6-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP11]], i8* [[TMP12]]) #[[ATTR3]] -// CHECK6-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_worker +// CHECK3-SAME: () #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[WORK_FN:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[EXEC_STATUS:%.*]] = alloca i8, align 1 +// CHECK3-NEXT: store i8* null, i8** [[WORK_FN]], align 4 +// CHECK3-NEXT: store i8 0, i8* [[EXEC_STATUS]], align 1 +// CHECK3-NEXT: br label [[DOTAWAIT_WORK:%.*]] +// CHECK3: .await.work: +// CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK3-NEXT: [[TMP0:%.*]] = call i1 @__kmpc_kernel_parallel(i8** [[WORK_FN]]) +// CHECK3-NEXT: [[TMP1:%.*]] = zext i1 [[TMP0]] to i8 +// CHECK3-NEXT: store i8 [[TMP1]], i8* [[EXEC_STATUS]], align 1 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8*, i8** [[WORK_FN]], align 4 +// CHECK3-NEXT: [[SHOULD_TERMINATE:%.*]] = icmp eq i8* [[TMP2]], null +// CHECK3-NEXT: br i1 [[SHOULD_TERMINATE]], label [[DOTEXIT:%.*]], label [[DOTSELECT_WORKERS:%.*]] +// CHECK3: .select.workers: +// CHECK3-NEXT: [[TMP3:%.*]] = load i8, i8* [[EXEC_STATUS]], align 1 +// CHECK3-NEXT: [[IS_ACTIVE:%.*]] = icmp ne i8 [[TMP3]], 0 +// CHECK3-NEXT: br i1 [[IS_ACTIVE]], label [[DOTEXECUTE_PARALLEL:%.*]], label [[DOTBARRIER_PARALLEL:%.*]] +// CHECK3: .execute.parallel: +// CHECK3-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP2]] to void (i16, i32)* +// CHECK3-NEXT: call void [[TMP5]](i16 0, i32 [[TMP4]]) +// CHECK3-NEXT: br label [[DOTTERMINATE_PARALLEL:%.*]] +// CHECK3: .terminate.parallel: +// CHECK3-NEXT: call void @__kmpc_kernel_end_parallel() +// CHECK3-NEXT: br label [[DOTBARRIER_PARALLEL]] +// CHECK3: .barrier.parallel: +// CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK3-NEXT: br label [[DOTAWAIT_WORK]] +// CHECK3: .exit: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26 +// CHECK3-SAME: (i32 [[C:%.*]], i32 [[D:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[C]], i32* [[C_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[D]], i32* [[D_ADDR]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[C_ADDR]] to i8* +// CHECK3-NEXT: [[CONV1:%.*]] = bitcast i32* [[D_ADDR]] to float* +// CHECK3-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK3-NEXT: [[THREAD_LIMIT:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS]], [[NVPTX_WARP_SIZE]] +// CHECK3-NEXT: [[TMP0:%.*]] = icmp ult i32 [[NVPTX_TID]], [[THREAD_LIMIT]] +// CHECK3-NEXT: br i1 [[TMP0]], label [[DOTWORKER:%.*]], label [[DOTMASTERCHECK:%.*]] +// CHECK3: .worker: +// CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l26_worker() #[[ATTR3]] +// CHECK3-NEXT: br label [[DOTEXIT:%.*]] +// CHECK3: .mastercheck: +// CHECK3-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK3-NEXT: [[NVPTX_NUM_THREADS3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: [[NVPTX_WARP_SIZE4:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK3-NEXT: [[TMP1:%.*]] = sub nuw i32 [[NVPTX_WARP_SIZE4]], 1 +// CHECK3-NEXT: [[TMP2:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS3]], 1 +// CHECK3-NEXT: [[TMP3:%.*]] = xor i32 [[TMP1]], -1 +// CHECK3-NEXT: [[MASTER_TID:%.*]] = and i32 [[TMP2]], [[TMP3]] +// CHECK3-NEXT: [[TMP4:%.*]] = icmp eq i32 [[NVPTX_TID2]], [[MASTER_TID]] +// CHECK3-NEXT: br i1 [[TMP4]], label [[DOTMASTER:%.*]], label [[DOTEXIT]] +// CHECK3: .master: +// CHECK3-NEXT: [[NVPTX_NUM_THREADS5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: [[NVPTX_WARP_SIZE6:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK3-NEXT: [[THREAD_LIMIT7:%.*]] = sub nuw i32 [[NVPTX_NUM_THREADS5]], [[NVPTX_WARP_SIZE6]] +// CHECK3-NEXT: call void @__kmpc_kernel_init(i32 [[THREAD_LIMIT7]], i16 1) +// CHECK3-NEXT: [[TMP5:%.*]] = load i8, i8* [[CONV]], align 4 +// CHECK3-NEXT: [[C8:%.*]] = call i8* @__kmpc_alloc_shared(i32 1) +// CHECK3-NEXT: store i8 [[TMP5]], i8* [[C8]], align 1 +// CHECK3-NEXT: [[TMP6:%.*]] = load float, float* [[CONV1]], align 4 +// CHECK3-NEXT: [[D9:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK3-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D9]] to float* +// CHECK3-NEXT: store float [[TMP6]], float* [[D_ON_STACK]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: call void @__omp_outlined__1(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i8* [[C8]], float* [[D_ON_STACK]]) #[[ATTR3]] +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[D9]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[C8]]) +// CHECK3-NEXT: br label [[DOTTERMINATION_NOTIFIER:%.*]] +// CHECK3: .termination.notifier: +// CHECK3-NEXT: call void @__kmpc_kernel_deinit(i16 1) +// CHECK3-NEXT: call void @__kmpc_barrier_simple_spmd(%struct.ident_t* null, i32 0) +// CHECK3-NEXT: br label [[DOTEXIT]] +// CHECK3: .exit: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__1 +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i8* nonnull align 1 dereferenceable(1) [[C:%.*]], float* nonnull align 4 dereferenceable(4) [[D:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[C_ADDR:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[D_ADDR:%.*]] = alloca float*, align 4 +// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i8* [[C]], i8** [[C_ADDR]], align 4 +// CHECK3-NEXT: store float* [[D]], float** [[D_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i8*, i8** [[C_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load float*, float** [[D_ADDR]], align 4 +// CHECK3-NEXT: [[C1:%.*]] = call i8* @__kmpc_alloc_shared(i32 1) +// CHECK3-NEXT: [[D2:%.*]] = call i8* @__kmpc_alloc_shared(i32 4) +// CHECK3-NEXT: [[D_ON_STACK:%.*]] = bitcast i8* [[D2]] to float* +// CHECK3-NEXT: store i8 0, i8* [[C1]], align 1 +// CHECK3-NEXT: store float 1.000000e+00, float* [[D_ON_STACK]], align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = load i8, i8* [[C1]], align 1 +// CHECK3-NEXT: [[CONV:%.*]] = sext i8 [[TMP2]] to i32 +// CHECK3-NEXT: [[XOR:%.*]] = xor i32 [[CONV]], 2 +// CHECK3-NEXT: [[CONV3:%.*]] = trunc i32 [[XOR]] to i8 +// CHECK3-NEXT: store i8 [[CONV3]], i8* [[C1]], align 1 +// CHECK3-NEXT: [[TMP3:%.*]] = load float, float* [[D_ON_STACK]], align 4 +// CHECK3-NEXT: [[MUL:%.*]] = fmul float [[TMP3]], 3.300000e+01 +// CHECK3-NEXT: store float [[MUL]], float* [[D_ON_STACK]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: store i8* [[C1]], i8** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast float* [[D_ON_STACK]] to i8* +// CHECK3-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB1]], i32 [[TMP5]], i8* [[TMP10]], i32 2048, i8* [[TMP9]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func3, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func4, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func5, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func6, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func7, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func8) +// CHECK3-NEXT: [[TMP12:%.*]] = icmp eq i32 [[TMP11]], 1 +// CHECK3-NEXT: br i1 [[TMP12]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK3: .omp.reduction.then: +// CHECK3-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP0]], align 1 +// CHECK3-NEXT: [[CONV4:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[C1]], align 1 +// CHECK3-NEXT: [[CONV5:%.*]] = sext i8 [[TMP14]] to i32 +// CHECK3-NEXT: [[XOR6:%.*]] = xor i32 [[CONV4]], [[CONV5]] +// CHECK3-NEXT: [[CONV7:%.*]] = trunc i32 [[XOR6]] to i8 +// CHECK3-NEXT: store i8 [[CONV7]], i8* [[TMP0]], align 1 +// CHECK3-NEXT: [[TMP15:%.*]] = load float, float* [[TMP1]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load float, float* [[D_ON_STACK]], align 4 +// CHECK3-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP16]] +// CHECK3-NEXT: store float [[MUL8]], float* [[TMP1]], align 4 +// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP5]]) +// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK3: .omp.reduction.done: +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[D2]]) +// CHECK3-NEXT: call void @__kmpc_free_shared(i8* [[C1]]) +// CHECK3-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func7 -// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK6-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.3* -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 -// CHECK6-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3:%.*]], %struct._globalized_locals_ty.3* [[TMP6]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i8], [2048 x i8]* [[C]], i32 0, i32 [[TMP7]] -// CHECK6-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 128 -// CHECK6-NEXT: store i8 [[TMP11]], i8* [[TMP9]], align 1 -// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 -// CHECK6-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to float* -// CHECK6-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3]], %struct._globalized_locals_ty.3* [[TMP6]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2048 x float], [2048 x float]* [[D]], i32 0, i32 [[TMP7]] -// CHECK6-NEXT: [[TMP16:%.*]] = load float, float* [[TMP15]], align 128 -// CHECK6-NEXT: store float [[TMP16]], float* [[TMP14]], align 4 -// CHECK6-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func3 +// CHECK3-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i8, align 1 +// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca float, align 4 +// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 +// CHECK3-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 +// CHECK3-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 +// CHECK3-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 +// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr i8, i8* [[TMP10]], i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i8, i8* [[TMP10]], align 1 +// CHECK3-NEXT: [[TMP14:%.*]] = sext i8 [[TMP13]] to i32 +// CHECK3-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK3-NEXT: [[TMP15:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 +// CHECK3-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP14]], i16 [[TMP7]], i16 [[TMP15]]) +// CHECK3-NEXT: [[TMP17:%.*]] = trunc i32 [[TMP16]] to i8 +// CHECK3-NEXT: store i8 [[TMP17]], i8* [[DOTOMP_REDUCTION_ELEMENT]], align 1 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr i8, i8* [[TMP10]], i32 1 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr i8, i8* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK3-NEXT: store i8* [[DOTOMP_REDUCTION_ELEMENT]], i8** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP23:%.*]] = bitcast i8* [[TMP21]] to float* +// CHECK3-NEXT: [[TMP24:%.*]] = getelementptr float, float* [[TMP23]], i32 1 +// CHECK3-NEXT: [[TMP25:%.*]] = bitcast float* [[TMP24]] to i8* +// CHECK3-NEXT: [[TMP26:%.*]] = bitcast float* [[TMP23]] to i32* +// CHECK3-NEXT: [[TMP27:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i32* +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[TMP26]], align 4 +// CHECK3-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK3-NEXT: [[TMP29:%.*]] = trunc i32 [[NVPTX_WARP_SIZE5]] to i16 +// CHECK3-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP28]], i16 [[TMP7]], i16 [[TMP29]]) +// CHECK3-NEXT: store i32 [[TMP30]], i32* [[TMP27]], align 4 +// CHECK3-NEXT: [[TMP31:%.*]] = getelementptr i32, i32* [[TMP26]], i32 1 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr i32, i32* [[TMP27]], i32 1 +// CHECK3-NEXT: [[TMP33:%.*]] = bitcast float* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* +// CHECK3-NEXT: store i8* [[TMP33]], i8** [[TMP22]], align 4 +// CHECK3-NEXT: [[TMP34:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP36:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP37:%.*]] = and i1 [[TMP35]], [[TMP36]] +// CHECK3-NEXT: [[TMP38:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK3-NEXT: [[TMP39:%.*]] = and i16 [[TMP6]], 1 +// CHECK3-NEXT: [[TMP40:%.*]] = icmp eq i16 [[TMP39]], 0 +// CHECK3-NEXT: [[TMP41:%.*]] = and i1 [[TMP38]], [[TMP40]] +// CHECK3-NEXT: [[TMP42:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP43:%.*]] = and i1 [[TMP41]], [[TMP42]] +// CHECK3-NEXT: [[TMP44:%.*]] = or i1 [[TMP34]], [[TMP37]] +// CHECK3-NEXT: [[TMP45:%.*]] = or i1 [[TMP44]], [[TMP43]] +// CHECK3-NEXT: br i1 [[TMP45]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK3: then: +// CHECK3-NEXT: [[TMP46:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* +// CHECK3-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP46]], i8* [[TMP47]]) #[[ATTR3]] +// CHECK3-NEXT: br label [[IFCONT:%.*]] +// CHECK3: else: +// CHECK3-NEXT: br label [[IFCONT]] +// CHECK3: ifcont: +// CHECK3-NEXT: [[TMP48:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP49:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP50:%.*]] = and i1 [[TMP48]], [[TMP49]] +// CHECK3-NEXT: br i1 [[TMP50]], label [[THEN6:%.*]], label [[ELSE7:%.*]] +// CHECK3: then6: +// CHECK3-NEXT: [[TMP51:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP52:%.*]] = load i8*, i8** [[TMP51]], align 4 +// CHECK3-NEXT: [[TMP53:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP54:%.*]] = load i8*, i8** [[TMP53]], align 4 +// CHECK3-NEXT: [[TMP55:%.*]] = load i8, i8* [[TMP52]], align 1 +// CHECK3-NEXT: store i8 [[TMP55]], i8* [[TMP54]], align 1 +// CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP57:%.*]] = load i8*, i8** [[TMP56]], align 4 +// CHECK3-NEXT: [[TMP58:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP59:%.*]] = load i8*, i8** [[TMP58]], align 4 +// CHECK3-NEXT: [[TMP60:%.*]] = bitcast i8* [[TMP57]] to float* +// CHECK3-NEXT: [[TMP61:%.*]] = bitcast i8* [[TMP59]] to float* +// CHECK3-NEXT: [[TMP62:%.*]] = load float, float* [[TMP60]], align 4 +// CHECK3-NEXT: store float [[TMP62]], float* [[TMP61]], align 4 +// CHECK3-NEXT: br label [[IFCONT8:%.*]] +// CHECK3: else7: +// CHECK3-NEXT: br label [[IFCONT8]] +// CHECK3: ifcont8: +// CHECK3-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func8 -// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.3* -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK6-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3:%.*]], %struct._globalized_locals_ty.3* [[TMP4]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i8], [2048 x i8]* [[C]], i32 0, i32 [[TMP5]] -// CHECK6-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK6-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_3]], %struct._globalized_locals_ty.3* [[TMP4]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2048 x float], [2048 x float]* [[D]], i32 0, i32 [[TMP5]] -// CHECK6-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to i8* -// CHECK6-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 4 -// CHECK6-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK6-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK6-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP12]], i8* [[TMP11]]) #[[ATTR3]] -// CHECK6-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func4 +// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) +// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK3-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK3-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 +// CHECK3-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 +// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK3: then: +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i32 addrspace(3)* [[TMP7]] to i8 addrspace(3)* +// CHECK3-NEXT: [[TMP9:%.*]] = load i8, i8* [[TMP6]], align 1 +// CHECK3-NEXT: store volatile i8 [[TMP9]], i8 addrspace(3)* [[TMP8]], align 1 +// CHECK3-NEXT: br label [[IFCONT:%.*]] +// CHECK3: else: +// CHECK3-NEXT: br label [[IFCONT]] +// CHECK3: ifcont: +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] +// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK3: then4: +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast i32 addrspace(3)* [[TMP11]] to i8 addrspace(3)* +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load volatile i8, i8 addrspace(3)* [[TMP12]], align 1 +// CHECK3-NEXT: store i8 [[TMP15]], i8* [[TMP14]], align 1 +// CHECK3-NEXT: br label [[IFCONT6:%.*]] +// CHECK3: else5: +// CHECK3-NEXT: br label [[IFCONT6]] +// CHECK3: ifcont6: +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK3-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK3-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] +// CHECK3: then8: +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i32* +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP18]], align 4 +// CHECK3-NEXT: store volatile i32 [[TMP20]], i32 addrspace(3)* [[TMP19]], align 4 +// CHECK3-NEXT: br label [[IFCONT10:%.*]] +// CHECK3: else9: +// CHECK3-NEXT: br label [[IFCONT10]] +// CHECK3: ifcont10: +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP21]] +// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] +// CHECK3: then12: +// CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP24:%.*]] = load i8*, i8** [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to i32* +// CHECK3-NEXT: [[TMP26:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP22]], align 4 +// CHECK3-NEXT: store i32 [[TMP26]], i32* [[TMP25]], align 4 +// CHECK3-NEXT: br label [[IFCONT14:%.*]] +// CHECK3: else13: +// CHECK3-NEXT: br label [[IFCONT14]] +// CHECK3: ifcont14: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func5 +// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK3-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.0* +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 +// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i8], [2048 x i8]* [[C]], i32 0, i32 [[TMP7]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP9]], align 1 +// CHECK3-NEXT: store i8 [[TMP11]], i8* [[TMP10]], align 128 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to float* +// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2048 x float], [2048 x float]* [[D]], i32 0, i32 [[TMP7]] +// CHECK3-NEXT: [[TMP16:%.*]] = load float, float* [[TMP14]], align 4 +// CHECK3-NEXT: store float [[TMP16]], float* [[TMP15]], align 128 +// CHECK3-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l36 -// CHECK6-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) #[[ATTR1]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 -// CHECK6-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 -// CHECK6-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 -// CHECK6-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* -// CHECK6-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() -// CHECK6-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) -// CHECK6-NEXT: call void @__kmpc_data_sharing_init_stack_spmd() -// CHECK6-NEXT: br label [[DOTEXECUTE:%.*]] -// CHECK6: .execute: -// CHECK6-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) -// CHECK6-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 -// CHECK6-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[A_ADDR]], i16* [[CONV]]) #[[ATTR3]] -// CHECK6-NEXT: br label [[DOTOMP_DEINIT:%.*]] -// CHECK6: .omp.deinit: -// CHECK6-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) -// CHECK6-NEXT: br label [[DOTEXIT:%.*]] -// CHECK6: .exit: -// CHECK6-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func6 +// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.0* +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i8], [2048 x i8]* [[C]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2048 x float], [2048 x float]* [[D]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to i8* +// CHECK3-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP11]], i8* [[TMP12]]) #[[ATTR3]] +// CHECK3-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__9 -// CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i16* nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 -// CHECK6-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[B2:%.*]] = alloca i16, align 2 -// CHECK6-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 -// CHECK6-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK6-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 -// CHECK6-NEXT: store i32 0, i32* [[A1]], align 4 -// CHECK6-NEXT: store i16 -32768, i16* [[B2]], align 2 -// CHECK6-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP3:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK6-NEXT: store i8* [[TMP3]], i8** [[TMP2]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP5:%.*]] = bitcast i16* [[B2]] to i8* -// CHECK6-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** -// CHECK6-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*)* @__omp_outlined__10 to i8*), i8* null, i8** [[TMP8]], i32 2) -// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP10:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK6-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 -// CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP12:%.*]] = bitcast i16* [[B2]] to i8* -// CHECK6-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK6-NEXT: [[TMP14:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 -// CHECK6-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i8* [[TMP14]], i32 2048, i8* [[TMP13]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func15, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func16, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func17, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func18, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func19, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func20) -// CHECK6-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1 -// CHECK6-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] -// CHECK6: .omp.reduction.then: -// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK6-NEXT: [[OR:%.*]] = or i32 [[TMP17]], [[TMP18]] -// CHECK6-NEXT: store i32 [[OR]], i32* [[TMP0]], align 4 -// CHECK6-NEXT: [[TMP19:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK6-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32 -// CHECK6-NEXT: [[TMP20:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK6-NEXT: [[CONV3:%.*]] = sext i16 [[TMP20]] to i32 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV3]] -// CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK6: cond.true: -// CHECK6-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK6-NEXT: br label [[COND_END:%.*]] -// CHECK6: cond.false: -// CHECK6-NEXT: [[TMP22:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK6-NEXT: br label [[COND_END]] -// CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] -// CHECK6-NEXT: store i16 [[COND]], i16* [[TMP1]], align 2 -// CHECK6-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP7]]) -// CHECK6-NEXT: br label [[DOTOMP_REDUCTION_DONE]] -// CHECK6: .omp.reduction.done: -// CHECK6-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func7 +// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK3-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.0* +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 +// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i8], [2048 x i8]* [[C]], i32 0, i32 [[TMP7]] +// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[TMP10]], align 128 +// CHECK3-NEXT: store i8 [[TMP11]], i8* [[TMP9]], align 1 +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to float* +// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], %struct._globalized_locals_ty.0* [[TMP6]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP15:%.*]] = getelementptr inbounds [2048 x float], [2048 x float]* [[D]], i32 0, i32 [[TMP7]] +// CHECK3-NEXT: [[TMP16:%.*]] = load float, float* [[TMP15]], align 128 +// CHECK3-NEXT: store float [[TMP16]], float* [[TMP14]], align 4 +// CHECK3-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@__omp_outlined__10 -// CHECK6-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i16* nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 -// CHECK6-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 -// CHECK6-NEXT: [[A1:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[B2:%.*]] = alloca i16, align 2 -// CHECK6-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK6-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 -// CHECK6-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 -// CHECK6-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 -// CHECK6-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 -// CHECK6-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 -// CHECK6-NEXT: store i32 0, i32* [[A1]], align 4 -// CHECK6-NEXT: store i16 -32768, i16* [[B2]], align 2 -// CHECK6-NEXT: [[TMP2:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK6-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1 -// CHECK6-NEXT: store i32 [[OR]], i32* [[A1]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK6-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 -// CHECK6-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]] -// CHECK6-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] -// CHECK6: cond.true: -// CHECK6-NEXT: br label [[COND_END:%.*]] -// CHECK6: cond.false: -// CHECK6-NEXT: [[TMP4:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK6-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32 -// CHECK6-NEXT: br label [[COND_END]] -// CHECK6: cond.end: -// CHECK6-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] -// CHECK6-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16 -// CHECK6-NEXT: store i16 [[CONV4]], i16* [[B2]], align 2 -// CHECK6-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP8:%.*]] = bitcast i32* [[A1]] to i8* -// CHECK6-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP10:%.*]] = bitcast i16* [[B2]] to i8* -// CHECK6-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 -// CHECK6-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK6-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 2, i32 8, i8* [[TMP11]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func12, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func13) -// CHECK6-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 -// CHECK6-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] -// CHECK6: .omp.reduction.then: -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP0]], align 4 -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[A1]], align 4 -// CHECK6-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]] -// CHECK6-NEXT: store i32 [[OR5]], i32* [[TMP0]], align 4 -// CHECK6-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK6-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32 -// CHECK6-NEXT: [[TMP17:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK6-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32 -// CHECK6-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] -// CHECK6-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] -// CHECK6: cond.true9: -// CHECK6-NEXT: [[TMP18:%.*]] = load i16, i16* [[TMP1]], align 2 -// CHECK6-NEXT: br label [[COND_END11:%.*]] -// CHECK6: cond.false10: -// CHECK6-NEXT: [[TMP19:%.*]] = load i16, i16* [[B2]], align 2 -// CHECK6-NEXT: br label [[COND_END11]] -// CHECK6: cond.end11: -// CHECK6-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ] -// CHECK6-NEXT: store i16 [[COND12]], i16* [[TMP1]], align 2 -// CHECK6-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) -// CHECK6-NEXT: br label [[DOTOMP_REDUCTION_DONE]] -// CHECK6: .omp.reduction.done: -// CHECK6-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func8 +// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.0* +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0:%.*]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i8], [2048 x i8]* [[C]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: store i8* [[TMP7]], i8** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[D:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_0]], %struct._globalized_locals_ty.0* [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2048 x float], [2048 x float]* [[D]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP10:%.*]] = bitcast float* [[TMP9]] to i8* +// CHECK3-NEXT: store i8* [[TMP10]], i8** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP12:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func2"(i8* [[TMP12]], i8* [[TMP11]]) #[[ATTR3]] +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z9ftemplateIcET_i_l33 +// CHECK3-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTZERO_ADDR:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTTHREADID_TEMP_:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: store i32 0, i32* [[DOTZERO_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[A]], i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[B]], i32* [[B_ADDR]], align 4 +// CHECK3-NEXT: [[CONV:%.*]] = bitcast i32* [[B_ADDR]] to i16* +// CHECK3-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x() +// CHECK3-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1) +// CHECK3-NEXT: br label [[DOTEXECUTE:%.*]] +// CHECK3: .execute: +// CHECK3-NEXT: [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) +// CHECK3-NEXT: store i32 [[TMP0]], i32* [[DOTTHREADID_TEMP_]], align 4 +// CHECK3-NEXT: call void @__omp_outlined__9(i32* [[DOTTHREADID_TEMP_]], i32* [[DOTZERO_ADDR]], i32* [[A_ADDR]], i16* [[CONV]]) #[[ATTR3]] +// CHECK3-NEXT: br label [[DOTOMP_DEINIT:%.*]] +// CHECK3: .omp.deinit: +// CHECK3-NEXT: call void @__kmpc_spmd_kernel_deinit_v2(i16 1) +// CHECK3-NEXT: br label [[DOTEXIT:%.*]] +// CHECK3: .exit: +// CHECK3-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func12 -// CHECK6-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 -// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 -// CHECK6-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK6-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK6-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2 -// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 -// CHECK6-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 -// CHECK6-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 -// CHECK6-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* -// CHECK6-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 -// CHECK6-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 -// CHECK6-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 -// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 -// CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* -// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 -// CHECK6-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK6-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK6-NEXT: [[TMP16:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 -// CHECK6-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP16]]) -// CHECK6-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK6-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 -// CHECK6-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK6-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK6-NEXT: store i8* [[TMP20]], i8** [[TMP11]], align 4 -// CHECK6-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 4 -// CHECK6-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP22]] to i16* -// CHECK6-NEXT: [[TMP25:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 -// CHECK6-NEXT: [[TMP26:%.*]] = bitcast i16* [[TMP25]] to i8* -// CHECK6-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP24]], align 2 -// CHECK6-NEXT: [[TMP28:%.*]] = sext i16 [[TMP27]] to i32 -// CHECK6-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK6-NEXT: [[TMP29:%.*]] = trunc i32 [[NVPTX_WARP_SIZE5]] to i16 -// CHECK6-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP28]], i16 [[TMP7]], i16 [[TMP29]]) -// CHECK6-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 -// CHECK6-NEXT: store i16 [[TMP31]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK6-NEXT: [[TMP32:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 -// CHECK6-NEXT: [[TMP33:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 -// CHECK6-NEXT: [[TMP34:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* -// CHECK6-NEXT: store i8* [[TMP34]], i8** [[TMP23]], align 4 -// CHECK6-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK6-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK6-NEXT: [[TMP37:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK6-NEXT: [[TMP38:%.*]] = and i1 [[TMP36]], [[TMP37]] -// CHECK6-NEXT: [[TMP39:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK6-NEXT: [[TMP40:%.*]] = and i16 [[TMP6]], 1 -// CHECK6-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP40]], 0 -// CHECK6-NEXT: [[TMP42:%.*]] = and i1 [[TMP39]], [[TMP41]] -// CHECK6-NEXT: [[TMP43:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK6-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] -// CHECK6-NEXT: [[TMP45:%.*]] = or i1 [[TMP35]], [[TMP38]] -// CHECK6-NEXT: [[TMP46:%.*]] = or i1 [[TMP45]], [[TMP44]] -// CHECK6-NEXT: br i1 [[TMP46]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK6: then: -// CHECK6-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* -// CHECK6-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK6-NEXT: call void @"_omp$reduction$reduction_func11"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR3]] -// CHECK6-NEXT: br label [[IFCONT:%.*]] -// CHECK6: else: -// CHECK6-NEXT: br label [[IFCONT]] -// CHECK6: ifcont: -// CHECK6-NEXT: [[TMP49:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK6-NEXT: [[TMP50:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK6-NEXT: [[TMP51:%.*]] = and i1 [[TMP49]], [[TMP50]] -// CHECK6-NEXT: br i1 [[TMP51]], label [[THEN6:%.*]], label [[ELSE7:%.*]] -// CHECK6: then6: -// CHECK6-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP53:%.*]] = load i8*, i8** [[TMP52]], align 4 -// CHECK6-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP55:%.*]] = load i8*, i8** [[TMP54]], align 4 -// CHECK6-NEXT: [[TMP56:%.*]] = bitcast i8* [[TMP53]] to i32* -// CHECK6-NEXT: [[TMP57:%.*]] = bitcast i8* [[TMP55]] to i32* -// CHECK6-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP56]], align 4 -// CHECK6-NEXT: store i32 [[TMP58]], i32* [[TMP57]], align 4 -// CHECK6-NEXT: [[TMP59:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP60:%.*]] = load i8*, i8** [[TMP59]], align 4 -// CHECK6-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP62:%.*]] = load i8*, i8** [[TMP61]], align 4 -// CHECK6-NEXT: [[TMP63:%.*]] = bitcast i8* [[TMP60]] to i16* -// CHECK6-NEXT: [[TMP64:%.*]] = bitcast i8* [[TMP62]] to i16* -// CHECK6-NEXT: [[TMP65:%.*]] = load i16, i16* [[TMP63]], align 2 -// CHECK6-NEXT: store i16 [[TMP65]], i16* [[TMP64]], align 2 -// CHECK6-NEXT: br label [[IFCONT8:%.*]] -// CHECK6: else7: -// CHECK6-NEXT: br label [[IFCONT8]] -// CHECK6: ifcont8: -// CHECK6-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__9 +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i16* nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 +// CHECK3-NEXT: [[A1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B2:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[CAPTURED_VARS_ADDRS:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 +// CHECK3-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[A1]], align 4 +// CHECK3-NEXT: store i16 -32768, i16* [[B2]], align 2 +// CHECK3-NEXT: [[TMP2:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP3:%.*]] = bitcast i32* [[A1]] to i8* +// CHECK3-NEXT: store i8* [[TMP3]], i8** [[TMP2]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[CAPTURED_VARS_ADDRS]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i16* [[B2]] to i8* +// CHECK3-NEXT: store i8* [[TMP5]], i8** [[TMP4]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast [2 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8** +// CHECK3-NEXT: call void @__kmpc_parallel_51(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i32 1, i32 -1, i32 -1, i8* bitcast (void (i32*, i32*, i32*, i16*)* @__omp_outlined__10 to i8*), i8* null, i8** [[TMP8]], i32 2) +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP10:%.*]] = bitcast i32* [[A1]] to i8* +// CHECK3-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast i16* [[B2]] to i8* +// CHECK3-NEXT: store i8* [[TMP12]], i8** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** @"_openmp_teams_reductions_buffer_$_$ptr", align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_nvptx_teams_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP7]], i8* [[TMP14]], i32 2048, i8* [[TMP13]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func15, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func16, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_copy_func17, void (i8*, i32, i8*)* @_omp_reduction_list_to_global_reduce_func18, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_copy_func19, void (i8*, i32, i8*)* @_omp_reduction_global_to_list_reduce_func20) +// CHECK3-NEXT: [[TMP16:%.*]] = icmp eq i32 [[TMP15]], 1 +// CHECK3-NEXT: br i1 [[TMP16]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK3: .omp.reduction.then: +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK3-NEXT: [[OR:%.*]] = or i32 [[TMP17]], [[TMP18]] +// CHECK3-NEXT: store i32 [[OR]], i32* [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP19:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP19]] to i32 +// CHECK3-NEXT: [[TMP20:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP20]] to i32 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 [[CONV]], [[CONV3]] +// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP22:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i16 [ [[TMP21]], [[COND_TRUE]] ], [ [[TMP22]], [[COND_FALSE]] ] +// CHECK3-NEXT: store i16 [[COND]], i16* [[TMP1]], align 2 +// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP7]]) +// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK3: .omp.reduction.done: +// CHECK3-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func13 -// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK6-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK6-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 -// CHECK6-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK6-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 -// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK6-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP2]]) -// CHECK6-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK6-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK6: then: -// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* -// CHECK6-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK6-NEXT: store volatile i32 [[TMP9]], i32 addrspace(3)* [[TMP8]], align 4 -// CHECK6-NEXT: br label [[IFCONT:%.*]] -// CHECK6: else: -// CHECK6-NEXT: br label [[IFCONT]] -// CHECK6: ifcont: -// CHECK6-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] -// CHECK6-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK6: then4: -// CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 -// CHECK6-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to i32* -// CHECK6-NEXT: [[TMP15:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP11]], align 4 -// CHECK6-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 -// CHECK6-NEXT: br label [[IFCONT6:%.*]] -// CHECK6: else5: -// CHECK6-NEXT: br label [[IFCONT6]] -// CHECK6: ifcont6: -// CHECK6-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK6-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK6-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] -// CHECK6: then8: -// CHECK6-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 4 -// CHECK6-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i16* -// CHECK6-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK6-NEXT: [[TMP20:%.*]] = bitcast i32 addrspace(3)* [[TMP19]] to i16 addrspace(3)* -// CHECK6-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP18]], align 2 -// CHECK6-NEXT: store volatile i16 [[TMP21]], i16 addrspace(3)* [[TMP20]], align 2 -// CHECK6-NEXT: br label [[IFCONT10:%.*]] -// CHECK6: else9: -// CHECK6-NEXT: br label [[IFCONT10]] -// CHECK6: ifcont10: -// CHECK6-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK6-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP22]] -// CHECK6-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] -// CHECK6: then12: -// CHECK6-NEXT: [[TMP23:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK6-NEXT: [[TMP24:%.*]] = bitcast i32 addrspace(3)* [[TMP23]] to i16 addrspace(3)* -// CHECK6-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP26:%.*]] = load i8*, i8** [[TMP25]], align 4 -// CHECK6-NEXT: [[TMP27:%.*]] = bitcast i8* [[TMP26]] to i16* -// CHECK6-NEXT: [[TMP28:%.*]] = load volatile i16, i16 addrspace(3)* [[TMP24]], align 2 -// CHECK6-NEXT: store i16 [[TMP28]], i16* [[TMP27]], align 2 -// CHECK6-NEXT: br label [[IFCONT14:%.*]] -// CHECK6: else13: -// CHECK6-NEXT: br label [[IFCONT14]] -// CHECK6: ifcont14: -// CHECK6-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@__omp_outlined__10 +// CHECK3-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[A:%.*]], i16* nonnull align 2 dereferenceable(2) [[B:%.*]]) #[[ATTR1]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[A_ADDR:%.*]] = alloca i32*, align 4 +// CHECK3-NEXT: [[B_ADDR:%.*]] = alloca i16*, align 4 +// CHECK3-NEXT: [[A1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[B2:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 4 +// CHECK3-NEXT: store i32* [[A]], i32** [[A_ADDR]], align 4 +// CHECK3-NEXT: store i16* [[B]], i16** [[B_ADDR]], align 4 +// CHECK3-NEXT: [[TMP0:%.*]] = load i32*, i32** [[A_ADDR]], align 4 +// CHECK3-NEXT: [[TMP1:%.*]] = load i16*, i16** [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[A1]], align 4 +// CHECK3-NEXT: store i16 -32768, i16* [[B2]], align 2 +// CHECK3-NEXT: [[TMP2:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK3-NEXT: [[OR:%.*]] = or i32 [[TMP2]], 1 +// CHECK3-NEXT: store i32 [[OR]], i32* [[A1]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK3-NEXT: [[CONV:%.*]] = sext i16 [[TMP3]] to i32 +// CHECK3-NEXT: [[CMP:%.*]] = icmp sgt i32 99, [[CONV]] +// CHECK3-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]] +// CHECK3: cond.true: +// CHECK3-NEXT: br label [[COND_END:%.*]] +// CHECK3: cond.false: +// CHECK3-NEXT: [[TMP4:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP4]] to i32 +// CHECK3-NEXT: br label [[COND_END]] +// CHECK3: cond.end: +// CHECK3-NEXT: [[COND:%.*]] = phi i32 [ 99, [[COND_TRUE]] ], [ [[CONV3]], [[COND_FALSE]] ] +// CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[COND]] to i16 +// CHECK3-NEXT: store i16 [[CONV4]], i16* [[B2]], align 2 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i32* [[A1]] to i8* +// CHECK3-NEXT: store i8* [[TMP8]], i8** [[TMP7]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP10:%.*]] = bitcast i16* [[B2]] to i8* +// CHECK3-NEXT: store i8* [[TMP10]], i8** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_nvptx_parallel_reduce_nowait_v2(%struct.ident_t* @[[GLOB3]], i32 [[TMP6]], i32 2, i32 8, i8* [[TMP11]], void (i8*, i16, i16, i16)* @_omp_reduction_shuffle_and_reduce_func12, void (i8*, i32)* @_omp_reduction_inter_warp_copy_func13) +// CHECK3-NEXT: [[TMP13:%.*]] = icmp eq i32 [[TMP12]], 1 +// CHECK3-NEXT: br i1 [[TMP13]], label [[DOTOMP_REDUCTION_THEN:%.*]], label [[DOTOMP_REDUCTION_DONE:%.*]] +// CHECK3: .omp.reduction.then: +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[A1]], align 4 +// CHECK3-NEXT: [[OR5:%.*]] = or i32 [[TMP14]], [[TMP15]] +// CHECK3-NEXT: store i32 [[OR5]], i32* [[TMP0]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK3-NEXT: [[CONV6:%.*]] = sext i16 [[TMP16]] to i32 +// CHECK3-NEXT: [[TMP17:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK3-NEXT: [[CONV7:%.*]] = sext i16 [[TMP17]] to i32 +// CHECK3-NEXT: [[CMP8:%.*]] = icmp sgt i32 [[CONV6]], [[CONV7]] +// CHECK3-NEXT: br i1 [[CMP8]], label [[COND_TRUE9:%.*]], label [[COND_FALSE10:%.*]] +// CHECK3: cond.true9: +// CHECK3-NEXT: [[TMP18:%.*]] = load i16, i16* [[TMP1]], align 2 +// CHECK3-NEXT: br label [[COND_END11:%.*]] +// CHECK3: cond.false10: +// CHECK3-NEXT: [[TMP19:%.*]] = load i16, i16* [[B2]], align 2 +// CHECK3-NEXT: br label [[COND_END11]] +// CHECK3: cond.end11: +// CHECK3-NEXT: [[COND12:%.*]] = phi i16 [ [[TMP18]], [[COND_TRUE9]] ], [ [[TMP19]], [[COND_FALSE10]] ] +// CHECK3-NEXT: store i16 [[COND12]], i16* [[TMP1]], align 2 +// CHECK3-NEXT: call void @__kmpc_nvptx_end_reduce_nowait(i32 [[TMP6]]) +// CHECK3-NEXT: br label [[DOTOMP_REDUCTION_DONE]] +// CHECK3: .omp.reduction.done: +// CHECK3-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func15 -// CHECK6-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 -// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 -// CHECK6-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 -// CHECK6-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK6-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2 -// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 -// CHECK6-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 -// CHECK6-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 -// CHECK6-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* -// CHECK6-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 -// CHECK6-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 -// CHECK6-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 -// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 -// CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* -// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 -// CHECK6-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 4 -// CHECK6-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK6-NEXT: [[TMP16:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 -// CHECK6-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP16]]) -// CHECK6-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 -// CHECK6-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 -// CHECK6-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 -// CHECK6-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* -// CHECK6-NEXT: store i8* [[TMP20]], i8** [[TMP11]], align 4 -// CHECK6-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 4 -// CHECK6-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP22]] to i16* -// CHECK6-NEXT: [[TMP25:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 -// CHECK6-NEXT: [[TMP26:%.*]] = bitcast i16* [[TMP25]] to i8* -// CHECK6-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP24]], align 2 -// CHECK6-NEXT: [[TMP28:%.*]] = sext i16 [[TMP27]] to i32 -// CHECK6-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() -// CHECK6-NEXT: [[TMP29:%.*]] = trunc i32 [[NVPTX_WARP_SIZE5]] to i16 -// CHECK6-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP28]], i16 [[TMP7]], i16 [[TMP29]]) -// CHECK6-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 -// CHECK6-NEXT: store i16 [[TMP31]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 -// CHECK6-NEXT: [[TMP32:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 -// CHECK6-NEXT: [[TMP33:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 -// CHECK6-NEXT: [[TMP34:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* -// CHECK6-NEXT: store i8* [[TMP34]], i8** [[TMP23]], align 4 -// CHECK6-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 0 -// CHECK6-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK6-NEXT: [[TMP37:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] -// CHECK6-NEXT: [[TMP38:%.*]] = and i1 [[TMP36]], [[TMP37]] -// CHECK6-NEXT: [[TMP39:%.*]] = icmp eq i16 [[TMP8]], 2 -// CHECK6-NEXT: [[TMP40:%.*]] = and i16 [[TMP6]], 1 -// CHECK6-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP40]], 0 -// CHECK6-NEXT: [[TMP42:%.*]] = and i1 [[TMP39]], [[TMP41]] -// CHECK6-NEXT: [[TMP43:%.*]] = icmp sgt i16 [[TMP7]], 0 -// CHECK6-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] -// CHECK6-NEXT: [[TMP45:%.*]] = or i1 [[TMP35]], [[TMP38]] -// CHECK6-NEXT: [[TMP46:%.*]] = or i1 [[TMP45]], [[TMP44]] -// CHECK6-NEXT: br i1 [[TMP46]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK6: then: -// CHECK6-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* -// CHECK6-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* -// CHECK6-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR3]] -// CHECK6-NEXT: br label [[IFCONT:%.*]] -// CHECK6: else: -// CHECK6-NEXT: br label [[IFCONT]] -// CHECK6: ifcont: -// CHECK6-NEXT: [[TMP49:%.*]] = icmp eq i16 [[TMP8]], 1 -// CHECK6-NEXT: [[TMP50:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] -// CHECK6-NEXT: [[TMP51:%.*]] = and i1 [[TMP49]], [[TMP50]] -// CHECK6-NEXT: br i1 [[TMP51]], label [[THEN6:%.*]], label [[ELSE7:%.*]] -// CHECK6: then6: -// CHECK6-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP53:%.*]] = load i8*, i8** [[TMP52]], align 4 -// CHECK6-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP55:%.*]] = load i8*, i8** [[TMP54]], align 4 -// CHECK6-NEXT: [[TMP56:%.*]] = bitcast i8* [[TMP53]] to i32* -// CHECK6-NEXT: [[TMP57:%.*]] = bitcast i8* [[TMP55]] to i32* -// CHECK6-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP56]], align 4 -// CHECK6-NEXT: store i32 [[TMP58]], i32* [[TMP57]], align 4 -// CHECK6-NEXT: [[TMP59:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP60:%.*]] = load i8*, i8** [[TMP59]], align 4 -// CHECK6-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP62:%.*]] = load i8*, i8** [[TMP61]], align 4 -// CHECK6-NEXT: [[TMP63:%.*]] = bitcast i8* [[TMP60]] to i16* -// CHECK6-NEXT: [[TMP64:%.*]] = bitcast i8* [[TMP62]] to i16* -// CHECK6-NEXT: [[TMP65:%.*]] = load i16, i16* [[TMP63]], align 2 -// CHECK6-NEXT: store i16 [[TMP65]], i16* [[TMP64]], align 2 -// CHECK6-NEXT: br label [[IFCONT8:%.*]] -// CHECK6: else7: -// CHECK6-NEXT: br label [[IFCONT8]] -// CHECK6: ifcont8: -// CHECK6-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func12 +// CHECK3-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 +// CHECK3-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 +// CHECK3-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 +// CHECK3-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 +// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 +// CHECK3-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK3-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK3-NEXT: [[TMP16:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP16]]) +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK3-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK3-NEXT: store i8* [[TMP20]], i8** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP22]] to i16* +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 +// CHECK3-NEXT: [[TMP26:%.*]] = bitcast i16* [[TMP25]] to i8* +// CHECK3-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP24]], align 2 +// CHECK3-NEXT: [[TMP28:%.*]] = sext i16 [[TMP27]] to i32 +// CHECK3-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK3-NEXT: [[TMP29:%.*]] = trunc i32 [[NVPTX_WARP_SIZE5]] to i16 +// CHECK3-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP28]], i16 [[TMP7]], i16 [[TMP29]]) +// CHECK3-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 +// CHECK3-NEXT: store i16 [[TMP31]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 +// CHECK3-NEXT: [[TMP34:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* +// CHECK3-NEXT: store i8* [[TMP34]], i8** [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK3-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP37:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP38:%.*]] = and i1 [[TMP36]], [[TMP37]] +// CHECK3-NEXT: [[TMP39:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK3-NEXT: [[TMP40:%.*]] = and i16 [[TMP6]], 1 +// CHECK3-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP40]], 0 +// CHECK3-NEXT: [[TMP42:%.*]] = and i1 [[TMP39]], [[TMP41]] +// CHECK3-NEXT: [[TMP43:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] +// CHECK3-NEXT: [[TMP45:%.*]] = or i1 [[TMP35]], [[TMP38]] +// CHECK3-NEXT: [[TMP46:%.*]] = or i1 [[TMP45]], [[TMP44]] +// CHECK3-NEXT: br i1 [[TMP46]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK3: then: +// CHECK3-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* +// CHECK3-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func11"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR3]] +// CHECK3-NEXT: br label [[IFCONT:%.*]] +// CHECK3: else: +// CHECK3-NEXT: br label [[IFCONT]] +// CHECK3: ifcont: +// CHECK3-NEXT: [[TMP49:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP50:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP51:%.*]] = and i1 [[TMP49]], [[TMP50]] +// CHECK3-NEXT: br i1 [[TMP51]], label [[THEN6:%.*]], label [[ELSE7:%.*]] +// CHECK3: then6: +// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP53:%.*]] = load i8*, i8** [[TMP52]], align 4 +// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP55:%.*]] = load i8*, i8** [[TMP54]], align 4 +// CHECK3-NEXT: [[TMP56:%.*]] = bitcast i8* [[TMP53]] to i32* +// CHECK3-NEXT: [[TMP57:%.*]] = bitcast i8* [[TMP55]] to i32* +// CHECK3-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP56]], align 4 +// CHECK3-NEXT: store i32 [[TMP58]], i32* [[TMP57]], align 4 +// CHECK3-NEXT: [[TMP59:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP60:%.*]] = load i8*, i8** [[TMP59]], align 4 +// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP62:%.*]] = load i8*, i8** [[TMP61]], align 4 +// CHECK3-NEXT: [[TMP63:%.*]] = bitcast i8* [[TMP60]] to i16* +// CHECK3-NEXT: [[TMP64:%.*]] = bitcast i8* [[TMP62]] to i16* +// CHECK3-NEXT: [[TMP65:%.*]] = load i16, i16* [[TMP63]], align 2 +// CHECK3-NEXT: store i16 [[TMP65]], i16* [[TMP64]], align 2 +// CHECK3-NEXT: br label [[IFCONT8:%.*]] +// CHECK3: else7: +// CHECK3-NEXT: br label [[IFCONT8]] +// CHECK3: ifcont8: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func13 +// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK3-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK3-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 +// CHECK3-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 +// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 [[TMP2]]) +// CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK3: then: +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: store volatile i32 [[TMP9]], i32 addrspace(3)* [[TMP8]], align 4 +// CHECK3-NEXT: br label [[IFCONT:%.*]] +// CHECK3: else: +// CHECK3-NEXT: br label [[IFCONT]] +// CHECK3: ifcont: +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] +// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK3: then4: +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to i32* +// CHECK3-NEXT: [[TMP15:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP11]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK3-NEXT: br label [[IFCONT6:%.*]] +// CHECK3: else5: +// CHECK3-NEXT: br label [[IFCONT6]] +// CHECK3: ifcont6: +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK3-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK3-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] +// CHECK3: then8: +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i16* +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK3-NEXT: [[TMP20:%.*]] = bitcast i32 addrspace(3)* [[TMP19]] to i16 addrspace(3)* +// CHECK3-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP18]], align 2 +// CHECK3-NEXT: store volatile i16 [[TMP21]], i16 addrspace(3)* [[TMP20]], align 2 +// CHECK3-NEXT: br label [[IFCONT10:%.*]] +// CHECK3: else9: +// CHECK3-NEXT: br label [[IFCONT10]] +// CHECK3: ifcont10: +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP22]] +// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] +// CHECK3: then12: +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK3-NEXT: [[TMP24:%.*]] = bitcast i32 addrspace(3)* [[TMP23]] to i16 addrspace(3)* +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP26:%.*]] = load i8*, i8** [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = bitcast i8* [[TMP26]] to i16* +// CHECK3-NEXT: [[TMP28:%.*]] = load volatile i16, i16 addrspace(3)* [[TMP24]], align 2 +// CHECK3-NEXT: store i16 [[TMP28]], i16* [[TMP27]], align 2 +// CHECK3-NEXT: br label [[IFCONT14:%.*]] +// CHECK3: else13: +// CHECK3-NEXT: br label [[IFCONT14]] +// CHECK3: ifcont14: +// CHECK3-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func16 -// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) -// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK6-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK6-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 -// CHECK6-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() -// CHECK6-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 -// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK6-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK6-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK6-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] -// CHECK6: then: -// CHECK6-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* -// CHECK6-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]], align 4 -// CHECK6-NEXT: store volatile i32 [[TMP9]], i32 addrspace(3)* [[TMP8]], align 4 -// CHECK6-NEXT: br label [[IFCONT:%.*]] -// CHECK6: else: -// CHECK6-NEXT: br label [[IFCONT]] -// CHECK6: ifcont: -// CHECK6-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] -// CHECK6-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] -// CHECK6: then4: -// CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK6-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 -// CHECK6-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to i32* -// CHECK6-NEXT: [[TMP15:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP11]], align 4 -// CHECK6-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 -// CHECK6-NEXT: br label [[IFCONT6:%.*]] -// CHECK6: else5: -// CHECK6-NEXT: br label [[IFCONT6]] -// CHECK6: ifcont6: -// CHECK6-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK6-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 -// CHECK6-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] -// CHECK6: then8: -// CHECK6-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 4 -// CHECK6-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i16* -// CHECK6-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] -// CHECK6-NEXT: [[TMP20:%.*]] = bitcast i32 addrspace(3)* [[TMP19]] to i16 addrspace(3)* -// CHECK6-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP18]], align 2 -// CHECK6-NEXT: store volatile i16 [[TMP21]], i16 addrspace(3)* [[TMP20]], align 2 -// CHECK6-NEXT: br label [[IFCONT10:%.*]] -// CHECK6: else9: -// CHECK6-NEXT: br label [[IFCONT10]] -// CHECK6: ifcont10: -// CHECK6-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) -// CHECK6-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP22]] -// CHECK6-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] -// CHECK6: then12: -// CHECK6-NEXT: [[TMP23:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] -// CHECK6-NEXT: [[TMP24:%.*]] = bitcast i32 addrspace(3)* [[TMP23]] to i16 addrspace(3)* -// CHECK6-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP26:%.*]] = load i8*, i8** [[TMP25]], align 4 -// CHECK6-NEXT: [[TMP27:%.*]] = bitcast i8* [[TMP26]] to i16* -// CHECK6-NEXT: [[TMP28:%.*]] = load volatile i16, i16 addrspace(3)* [[TMP24]], align 2 -// CHECK6-NEXT: store i16 [[TMP28]], i16* [[TMP27]], align 2 -// CHECK6-NEXT: br label [[IFCONT14:%.*]] -// CHECK6: else13: -// CHECK6-NEXT: br label [[IFCONT14]] -// CHECK6: ifcont14: -// CHECK6-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_shuffle_and_reduce_func15 +// CHECK3-SAME: (i8* [[TMP0:%.*]], i16 signext [[TMP1:%.*]], i16 signext [[TMP2:%.*]], i16 signext [[TMP3:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[DOTADDR3:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTOMP_REDUCTION_ELEMENT4:%.*]] = alloca i16, align 2 +// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: store i16 [[TMP1]], i16* [[DOTADDR1]], align 2 +// CHECK3-NEXT: store i16 [[TMP2]], i16* [[DOTADDR2]], align 2 +// CHECK3-NEXT: store i16 [[TMP3]], i16* [[DOTADDR3]], align 2 +// CHECK3-NEXT: [[TMP4:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = bitcast i8* [[TMP4]] to [2 x i8*]* +// CHECK3-NEXT: [[TMP6:%.*]] = load i16, i16* [[DOTADDR1]], align 2 +// CHECK3-NEXT: [[TMP7:%.*]] = load i16, i16* [[DOTADDR2]], align 2 +// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[DOTADDR3]], align 2 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP10:%.*]] = load i8*, i8** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast i8* [[TMP10]] to i32* +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 +// CHECK3-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP13]] to i8* +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[TMP12]], align 4 +// CHECK3-NEXT: [[NVPTX_WARP_SIZE:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK3-NEXT: [[TMP16:%.*]] = trunc i32 [[NVPTX_WARP_SIZE]] to i16 +// CHECK3-NEXT: [[TMP17:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP15]], i16 [[TMP7]], i16 [[TMP16]]) +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_REDUCTION_ELEMENT]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = getelementptr i32, i32* [[TMP12]], i32 1 +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr i32, i32* [[DOTOMP_REDUCTION_ELEMENT]], i32 1 +// CHECK3-NEXT: [[TMP20:%.*]] = bitcast i32* [[DOTOMP_REDUCTION_ELEMENT]] to i8* +// CHECK3-NEXT: store i8* [[TMP20]], i8** [[TMP11]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP21]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP24:%.*]] = bitcast i8* [[TMP22]] to i16* +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 +// CHECK3-NEXT: [[TMP26:%.*]] = bitcast i16* [[TMP25]] to i8* +// CHECK3-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP24]], align 2 +// CHECK3-NEXT: [[TMP28:%.*]] = sext i16 [[TMP27]] to i32 +// CHECK3-NEXT: [[NVPTX_WARP_SIZE5:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.warpsize() +// CHECK3-NEXT: [[TMP29:%.*]] = trunc i32 [[NVPTX_WARP_SIZE5]] to i16 +// CHECK3-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_shuffle_int32(i32 [[TMP28]], i16 [[TMP7]], i16 [[TMP29]]) +// CHECK3-NEXT: [[TMP31:%.*]] = trunc i32 [[TMP30]] to i16 +// CHECK3-NEXT: store i16 [[TMP31]], i16* [[DOTOMP_REDUCTION_ELEMENT4]], align 2 +// CHECK3-NEXT: [[TMP32:%.*]] = getelementptr i16, i16* [[TMP24]], i32 1 +// CHECK3-NEXT: [[TMP33:%.*]] = getelementptr i16, i16* [[DOTOMP_REDUCTION_ELEMENT4]], i32 1 +// CHECK3-NEXT: [[TMP34:%.*]] = bitcast i16* [[DOTOMP_REDUCTION_ELEMENT4]] to i8* +// CHECK3-NEXT: store i8* [[TMP34]], i8** [[TMP23]], align 4 +// CHECK3-NEXT: [[TMP35:%.*]] = icmp eq i16 [[TMP8]], 0 +// CHECK3-NEXT: [[TMP36:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP37:%.*]] = icmp ult i16 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP38:%.*]] = and i1 [[TMP36]], [[TMP37]] +// CHECK3-NEXT: [[TMP39:%.*]] = icmp eq i16 [[TMP8]], 2 +// CHECK3-NEXT: [[TMP40:%.*]] = and i16 [[TMP6]], 1 +// CHECK3-NEXT: [[TMP41:%.*]] = icmp eq i16 [[TMP40]], 0 +// CHECK3-NEXT: [[TMP42:%.*]] = and i1 [[TMP39]], [[TMP41]] +// CHECK3-NEXT: [[TMP43:%.*]] = icmp sgt i16 [[TMP7]], 0 +// CHECK3-NEXT: [[TMP44:%.*]] = and i1 [[TMP42]], [[TMP43]] +// CHECK3-NEXT: [[TMP45:%.*]] = or i1 [[TMP35]], [[TMP38]] +// CHECK3-NEXT: [[TMP46:%.*]] = or i1 [[TMP45]], [[TMP44]] +// CHECK3-NEXT: br i1 [[TMP46]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK3: then: +// CHECK3-NEXT: [[TMP47:%.*]] = bitcast [2 x i8*]* [[TMP5]] to i8* +// CHECK3-NEXT: [[TMP48:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]] to i8* +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP47]], i8* [[TMP48]]) #[[ATTR3]] +// CHECK3-NEXT: br label [[IFCONT:%.*]] +// CHECK3: else: +// CHECK3-NEXT: br label [[IFCONT]] +// CHECK3: ifcont: +// CHECK3-NEXT: [[TMP49:%.*]] = icmp eq i16 [[TMP8]], 1 +// CHECK3-NEXT: [[TMP50:%.*]] = icmp uge i16 [[TMP6]], [[TMP7]] +// CHECK3-NEXT: [[TMP51:%.*]] = and i1 [[TMP49]], [[TMP50]] +// CHECK3-NEXT: br i1 [[TMP51]], label [[THEN6:%.*]], label [[ELSE7:%.*]] +// CHECK3: then6: +// CHECK3-NEXT: [[TMP52:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP53:%.*]] = load i8*, i8** [[TMP52]], align 4 +// CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP55:%.*]] = load i8*, i8** [[TMP54]], align 4 +// CHECK3-NEXT: [[TMP56:%.*]] = bitcast i8* [[TMP53]] to i32* +// CHECK3-NEXT: [[TMP57:%.*]] = bitcast i8* [[TMP55]] to i32* +// CHECK3-NEXT: [[TMP58:%.*]] = load i32, i32* [[TMP56]], align 4 +// CHECK3-NEXT: store i32 [[TMP58]], i32* [[TMP57]], align 4 +// CHECK3-NEXT: [[TMP59:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_REMOTE_REDUCE_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP60:%.*]] = load i8*, i8** [[TMP59]], align 4 +// CHECK3-NEXT: [[TMP61:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP5]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP62:%.*]] = load i8*, i8** [[TMP61]], align 4 +// CHECK3-NEXT: [[TMP63:%.*]] = bitcast i8* [[TMP60]] to i16* +// CHECK3-NEXT: [[TMP64:%.*]] = bitcast i8* [[TMP62]] to i16* +// CHECK3-NEXT: [[TMP65:%.*]] = load i16, i16* [[TMP63]], align 2 +// CHECK3-NEXT: store i16 [[TMP65]], i16* [[TMP64]], align 2 +// CHECK3-NEXT: br label [[IFCONT8:%.*]] +// CHECK3: else7: +// CHECK3-NEXT: br label [[IFCONT8]] +// CHECK3: ifcont8: +// CHECK3-NEXT: ret void +// +// +// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_inter_warp_copy_func16 +// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[TMP2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3]]) +// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[NVPTX_TID:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK3-NEXT: [[NVPTX_TID2:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK3-NEXT: [[NVPTX_LANE_ID:%.*]] = and i32 [[NVPTX_TID2]], 31 +// CHECK3-NEXT: [[NVPTX_TID3:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.tid.x() +// CHECK3-NEXT: [[NVPTX_WARP_ID:%.*]] = ashr i32 [[NVPTX_TID3]], 5 +// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK3-NEXT: [[WARP_MASTER:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK3-NEXT: br i1 [[WARP_MASTER]], label [[THEN:%.*]], label [[ELSE:%.*]] +// CHECK3: then: +// CHECK3-NEXT: [[TMP5:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP6:%.*]] = load i8*, i8** [[TMP5]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = bitcast i8* [[TMP6]] to i32* +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[TMP7]], align 4 +// CHECK3-NEXT: store volatile i32 [[TMP9]], i32 addrspace(3)* [[TMP8]], align 4 +// CHECK3-NEXT: br label [[IFCONT:%.*]] +// CHECK3: else: +// CHECK3-NEXT: br label [[IFCONT]] +// CHECK3: ifcont: +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[IS_ACTIVE_THREAD:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP10]] +// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD]], label [[THEN4:%.*]], label [[ELSE5:%.*]] +// CHECK3: then4: +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK3-NEXT: [[TMP12:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP13:%.*]] = load i8*, i8** [[TMP12]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = bitcast i8* [[TMP13]] to i32* +// CHECK3-NEXT: [[TMP15:%.*]] = load volatile i32, i32 addrspace(3)* [[TMP11]], align 4 +// CHECK3-NEXT: store i32 [[TMP15]], i32* [[TMP14]], align 4 +// CHECK3-NEXT: br label [[IFCONT6:%.*]] +// CHECK3: else5: +// CHECK3-NEXT: br label [[IFCONT6]] +// CHECK3: ifcont6: +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK3-NEXT: [[WARP_MASTER7:%.*]] = icmp eq i32 [[NVPTX_LANE_ID]], 0 +// CHECK3-NEXT: br i1 [[WARP_MASTER7]], label [[THEN8:%.*]], label [[ELSE9:%.*]] +// CHECK3: then8: +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP17:%.*]] = load i8*, i8** [[TMP16]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = bitcast i8* [[TMP17]] to i16* +// CHECK3-NEXT: [[TMP19:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_WARP_ID]] +// CHECK3-NEXT: [[TMP20:%.*]] = bitcast i32 addrspace(3)* [[TMP19]] to i16 addrspace(3)* +// CHECK3-NEXT: [[TMP21:%.*]] = load i16, i16* [[TMP18]], align 2 +// CHECK3-NEXT: store volatile i16 [[TMP21]], i16 addrspace(3)* [[TMP20]], align 2 +// CHECK3-NEXT: br label [[IFCONT10:%.*]] +// CHECK3: else9: +// CHECK3-NEXT: br label [[IFCONT10]] +// CHECK3: ifcont10: +// CHECK3-NEXT: call void @__kmpc_barrier(%struct.ident_t* @[[GLOB4]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[IS_ACTIVE_THREAD11:%.*]] = icmp ult i32 [[NVPTX_TID]], [[TMP22]] +// CHECK3-NEXT: br i1 [[IS_ACTIVE_THREAD11]], label [[THEN12:%.*]], label [[ELSE13:%.*]] +// CHECK3: then12: +// CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @__openmp_nvptx_data_transfer_temporary_storage, i64 0, i32 [[NVPTX_TID]] +// CHECK3-NEXT: [[TMP24:%.*]] = bitcast i32 addrspace(3)* [[TMP23]] to i16 addrspace(3)* +// CHECK3-NEXT: [[TMP25:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP26:%.*]] = load i8*, i8** [[TMP25]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = bitcast i8* [[TMP26]] to i16* +// CHECK3-NEXT: [[TMP28:%.*]] = load volatile i16, i16 addrspace(3)* [[TMP24]], align 2 +// CHECK3-NEXT: store i16 [[TMP28]], i16* [[TMP27]], align 2 +// CHECK3-NEXT: br label [[IFCONT14:%.*]] +// CHECK3: else13: +// CHECK3-NEXT: br label [[IFCONT14]] +// CHECK3: ifcont14: +// CHECK3-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func17 -// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK6-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.4* -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 -// CHECK6-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* -// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* [[A]], i32 0, i32 [[TMP7]] -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 -// CHECK6-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 128 -// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 4 -// CHECK6-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i16* -// CHECK6-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2048 x i16], [2048 x i16]* [[B]], i32 0, i32 [[TMP7]] -// CHECK6-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP15]], align 2 -// CHECK6-NEXT: store i16 [[TMP17]], i16* [[TMP16]], align 128 -// CHECK6-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_copy_func17 +// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK3-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.1* +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* [[A]], i32 0, i32 [[TMP7]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP10]], align 4 +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[TMP11]], align 128 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i16* +// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2048 x i16], [2048 x i16]* [[B]], i32 0, i32 [[TMP7]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP15]], align 2 +// CHECK3-NEXT: store i16 [[TMP17]], i16* [[TMP16]], align 128 +// CHECK3-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func18 -// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.4* -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* [[A]], i32 0, i32 [[TMP5]] -// CHECK6-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* -// CHECK6-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK6-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i16], [2048 x i16]* [[B]], i32 0, i32 [[TMP5]] -// CHECK6-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to i8* -// CHECK6-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK6-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK6-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP12]], i8* [[TMP13]]) #[[ATTR3]] -// CHECK6-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_list_to_global_reduce_func18 +// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.1* +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* [[A]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* +// CHECK3-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i16], [2048 x i16]* [[B]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to i8* +// CHECK3-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP12]], i8* [[TMP13]]) #[[ATTR3]] +// CHECK3-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func19 -// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* -// CHECK6-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.4* -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 -// CHECK6-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* -// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* [[A]], i32 0, i32 [[TMP7]] -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 128 -// CHECK6-NEXT: store i32 [[TMP12]], i32* [[TMP10]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 4 -// CHECK6-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i16* -// CHECK6-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP6]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2048 x i16], [2048 x i16]* [[B]], i32 0, i32 [[TMP7]] -// CHECK6-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP16]], align 128 -// CHECK6-NEXT: store i16 [[TMP17]], i16* [[TMP15]], align 2 -// CHECK6-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_copy_func19 +// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to [2 x i8*]* +// CHECK3-NEXT: [[TMP5:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = bitcast i8* [[TMP5]] to %struct._globalized_locals_ty.1* +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP9:%.*]] = load i8*, i8** [[TMP8]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = bitcast i8* [[TMP9]] to i32* +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* [[A]], i32 0, i32 [[TMP7]] +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 128 +// CHECK3-NEXT: store i32 [[TMP12]], i32* [[TMP10]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[TMP13]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = bitcast i8* [[TMP14]] to i16* +// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP6]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP16:%.*]] = getelementptr inbounds [2048 x i16], [2048 x i16]* [[B]], i32 0, i32 [[TMP7]] +// CHECK3-NEXT: [[TMP17:%.*]] = load i16, i16* [[TMP16]], align 128 +// CHECK3-NEXT: store i16 [[TMP17]], i16* [[TMP15]], align 2 +// CHECK3-NEXT: ret void // // -// CHECK6-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func20 -// CHECK6-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { -// CHECK6-NEXT: entry: -// CHECK6-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 -// CHECK6-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 -// CHECK6-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 -// CHECK6-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 -// CHECK6-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 -// CHECK6-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.4* -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 -// CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4:%.*]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* [[A]], i32 0, i32 [[TMP5]] -// CHECK6-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* -// CHECK6-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 -// CHECK6-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_4]], %struct._globalized_locals_ty.4* [[TMP4]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i16], [2048 x i16]* [[B]], i32 0, i32 [[TMP5]] -// CHECK6-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to i8* -// CHECK6-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 4 -// CHECK6-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* -// CHECK6-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 -// CHECK6-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP13]], i8* [[TMP12]]) #[[ATTR3]] -// CHECK6-NEXT: ret void +// CHECK3-LABEL: define {{[^@]+}}@_omp_reduction_global_to_list_reduce_func20 +// CHECK3-SAME: (i8* [[TMP0:%.*]], i32 [[TMP1:%.*]], i8* [[TMP2:%.*]]) #[[ATTR0]] { +// CHECK3-NEXT: entry: +// CHECK3-NEXT: [[DOTADDR:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[DOTADDR1:%.*]] = alloca i32, align 4 +// CHECK3-NEXT: [[DOTADDR2:%.*]] = alloca i8*, align 4 +// CHECK3-NEXT: [[DOTOMP_REDUCTION_RED_LIST:%.*]] = alloca [2 x i8*], align 4 +// CHECK3-NEXT: store i8* [[TMP0]], i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: store i32 [[TMP1]], i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: store i8* [[TMP2]], i8** [[DOTADDR2]], align 4 +// CHECK3-NEXT: [[TMP3:%.*]] = load i8*, i8** [[DOTADDR]], align 4 +// CHECK3-NEXT: [[TMP4:%.*]] = bitcast i8* [[TMP3]] to %struct._globalized_locals_ty.1* +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTADDR1]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 0 +// CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1:%.*]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 0 +// CHECK3-NEXT: [[TMP7:%.*]] = getelementptr inbounds [2048 x i32], [2048 x i32]* [[A]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP8:%.*]] = bitcast i32* [[TMP7]] to i8* +// CHECK3-NEXT: store i8* [[TMP8]], i8** [[TMP6]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [2 x i8*], [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]], i32 0, i32 1 +// CHECK3-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT__GLOBALIZED_LOCALS_TY_1]], %struct._globalized_locals_ty.1* [[TMP4]], i32 0, i32 1 +// CHECK3-NEXT: [[TMP10:%.*]] = getelementptr inbounds [2048 x i16], [2048 x i16]* [[B]], i32 0, i32 [[TMP5]] +// CHECK3-NEXT: [[TMP11:%.*]] = bitcast i16* [[TMP10]] to i8* +// CHECK3-NEXT: store i8* [[TMP11]], i8** [[TMP9]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = bitcast [2 x i8*]* [[DOTOMP_REDUCTION_RED_LIST]] to i8* +// CHECK3-NEXT: [[TMP13:%.*]] = load i8*, i8** [[DOTADDR2]], align 4 +// CHECK3-NEXT: call void @"_omp$reduction$reduction_func14"(i8* [[TMP13]], i8* [[TMP12]]) #[[ATTR3]] +// CHECK3-NEXT: ret void // diff --git a/clang/test/OpenMP/ordered_codegen.cpp b/clang/test/OpenMP/ordered_codegen.cpp index b49717f6cae5d..85c668ccfc591 100644 --- a/clang/test/OpenMP/ordered_codegen.cpp +++ b/clang/test/OpenMP/ordered_codegen.cpp @@ -560,30 +560,30 @@ void foo_simd(int low, int up) { // CHECK1-NEXT: store i32 0, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !2 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3 // CHECK1-NEXT: [[ADD6:%.*]] = add i32 [[TMP9]], 1 // CHECK1-NEXT: [[CMP7:%.*]] = icmp ult i32 [[TMP8]], [[ADD6]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !2 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !3 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 // CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP11]], 1 // CHECK1-NEXT: [[ADD8:%.*]] = add i32 [[TMP10]], [[MUL]] -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[I5]], align 4, !llvm.access.group !2 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !2 +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[I5]], align 4, !llvm.access.group !3 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !3 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store float 0.000000e+00, float* [[ARRAYIDX]], align 4, !llvm.access.group !2 -// CHECK1-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !2 +// CHECK1-NEXT: store float 0.000000e+00, float* [[ARRAYIDX]], align 4, !llvm.access.group !3 +// CHECK1-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 // CHECK1-NEXT: [[ADD9:%.*]] = add i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 @@ -633,31 +633,31 @@ void foo_simd(int low, int up) { // CHECK1-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV16]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND29:%.*]] // CHECK1: omp.inner.for.cond29: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4 -// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 // CHECK1-NEXT: [[ADD30:%.*]] = add i32 [[TMP29]], 1 // CHECK1-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]] // CHECK1-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]] // CHECK1: omp.inner.for.body32: -// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4 +// CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK1-NEXT: [[MUL33:%.*]] = mul i32 [[TMP31]], 1 // CHECK1-NEXT: [[ADD34:%.*]] = add i32 [[TMP30]], [[MUL33]] -// CHECK1-NEXT: store i32 [[ADD34]], i32* [[I28]], align 4 -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[I28]], align 4 +// CHECK1-NEXT: store i32 [[ADD34]], i32* [[I28]], align 4, !llvm.access.group !7 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[I28]], align 4, !llvm.access.group !7 // CHECK1-NEXT: [[IDXPROM35:%.*]] = sext i32 [[TMP32]] to i64 // CHECK1-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM35]] -// CHECK1-NEXT: store float 0.000000e+00, float* [[ARRAYIDX36]], align 4 -// CHECK1-NEXT: call void @__captured_stmt.1(i32* [[I28]]) +// CHECK1-NEXT: store float 0.000000e+00, float* [[ARRAYIDX36]], align 4, !llvm.access.group !7 +// CHECK1-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]] // CHECK1: omp.body.continue37: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]] // CHECK1: omp.inner.for.inc38: -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK1-NEXT: [[ADD39:%.*]] = add i32 [[TMP33]], 1 -// CHECK1-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4 -// CHECK1-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK1-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group !7 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK1: omp.inner.for.end40: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -1157,30 +1157,30 @@ void foo_simd(int low, int up) { // CHECK2-NEXT: store i32 0, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !2 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3 // CHECK2-NEXT: [[ADD6:%.*]] = add i32 [[TMP9]], 1 // CHECK2-NEXT: [[CMP7:%.*]] = icmp ult i32 [[TMP8]], [[ADD6]] // CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !2 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !3 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 // CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP11]], 1 // CHECK2-NEXT: [[ADD8:%.*]] = add i32 [[TMP10]], [[MUL]] -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[I5]], align 4, !llvm.access.group !2 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !2 +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[I5]], align 4, !llvm.access.group !3 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !3 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store float 0.000000e+00, float* [[ARRAYIDX]], align 4, !llvm.access.group !2 -// CHECK2-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !2 +// CHECK2-NEXT: store float 0.000000e+00, float* [[ARRAYIDX]], align 4, !llvm.access.group !3 +// CHECK2-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 // CHECK2-NEXT: [[ADD9:%.*]] = add i32 [[TMP13]], 1 -// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 @@ -1230,31 +1230,31 @@ void foo_simd(int low, int up) { // CHECK2-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV16]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND29:%.*]] // CHECK2: omp.inner.for.cond29: -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4 -// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 // CHECK2-NEXT: [[ADD30:%.*]] = add i32 [[TMP29]], 1 // CHECK2-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]] // CHECK2-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]] // CHECK2: omp.inner.for.body32: -// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4 +// CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK2-NEXT: [[MUL33:%.*]] = mul i32 [[TMP31]], 1 // CHECK2-NEXT: [[ADD34:%.*]] = add i32 [[TMP30]], [[MUL33]] -// CHECK2-NEXT: store i32 [[ADD34]], i32* [[I28]], align 4 -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[I28]], align 4 +// CHECK2-NEXT: store i32 [[ADD34]], i32* [[I28]], align 4, !llvm.access.group !7 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[I28]], align 4, !llvm.access.group !7 // CHECK2-NEXT: [[IDXPROM35:%.*]] = sext i32 [[TMP32]] to i64 // CHECK2-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM35]] -// CHECK2-NEXT: store float 0.000000e+00, float* [[ARRAYIDX36]], align 4 -// CHECK2-NEXT: call void @__captured_stmt.1(i32* [[I28]]) +// CHECK2-NEXT: store float 0.000000e+00, float* [[ARRAYIDX36]], align 4, !llvm.access.group !7 +// CHECK2-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]] // CHECK2: omp.body.continue37: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]] // CHECK2: omp.inner.for.inc38: -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK2-NEXT: [[ADD39:%.*]] = add i32 [[TMP33]], 1 -// CHECK2-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4 -// CHECK2-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK2-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group !7 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK2: omp.inner.for.end40: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -1754,30 +1754,30 @@ void foo_simd(int low, int up) { // CHECK3-NEXT: store i32 0, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !2 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3 // CHECK3-NEXT: [[ADD6:%.*]] = add i32 [[TMP9]], 1 // CHECK3-NEXT: [[CMP7:%.*]] = icmp ult i32 [[TMP8]], [[ADD6]] // CHECK3-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !2 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !3 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 // CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP11]], 1 // CHECK3-NEXT: [[ADD8:%.*]] = add i32 [[TMP10]], [[MUL]] -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[I5]], align 4, !llvm.access.group !2 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !2 +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[I5]], align 4, !llvm.access.group !3 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !3 // CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] -// CHECK3-NEXT: store float 0.000000e+00, float* [[ARRAYIDX]], align 4, !llvm.access.group !2 -// CHECK3-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !2 +// CHECK3-NEXT: store float 0.000000e+00, float* [[ARRAYIDX]], align 4, !llvm.access.group !3 +// CHECK3-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 // CHECK3-NEXT: [[ADD9:%.*]] = add i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 @@ -1827,31 +1827,31 @@ void foo_simd(int low, int up) { // CHECK3-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV16]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND29:%.*]] // CHECK3: omp.inner.for.cond29: -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[ADD30:%.*]] = add i32 [[TMP29]], 1 // CHECK3-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]] // CHECK3-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]] // CHECK3: omp.inner.for.body32: -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[MUL33:%.*]] = mul i32 [[TMP31]], 1 // CHECK3-NEXT: [[ADD34:%.*]] = add i32 [[TMP30]], [[MUL33]] -// CHECK3-NEXT: store i32 [[ADD34]], i32* [[I28]], align 4 -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[I28]], align 4 +// CHECK3-NEXT: store i32 [[ADD34]], i32* [[I28]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[I28]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[IDXPROM35:%.*]] = sext i32 [[TMP32]] to i64 // CHECK3-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM35]] -// CHECK3-NEXT: store float 0.000000e+00, float* [[ARRAYIDX36]], align 4 -// CHECK3-NEXT: call void @__captured_stmt.1(i32* [[I28]]) +// CHECK3-NEXT: store float 0.000000e+00, float* [[ARRAYIDX36]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]] // CHECK3: omp.body.continue37: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]] // CHECK3: omp.inner.for.inc38: -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[ADD39:%.*]] = add i32 [[TMP33]], 1 -// CHECK3-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4 -// CHECK3-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group !7 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK3: omp.inner.for.end40: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -2351,30 +2351,30 @@ void foo_simd(int low, int up) { // CHECK4-NEXT: store i32 0, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !2 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !3 // CHECK4-NEXT: [[ADD6:%.*]] = add i32 [[TMP9]], 1 // CHECK4-NEXT: [[CMP7:%.*]] = icmp ult i32 [[TMP8]], [[ADD6]] // CHECK4-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !2 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !3 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 // CHECK4-NEXT: [[MUL:%.*]] = mul i32 [[TMP11]], 1 // CHECK4-NEXT: [[ADD8:%.*]] = add i32 [[TMP10]], [[MUL]] -// CHECK4-NEXT: store i32 [[ADD8]], i32* [[I5]], align 4, !llvm.access.group !2 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !2 +// CHECK4-NEXT: store i32 [[ADD8]], i32* [[I5]], align 4, !llvm.access.group !3 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !3 // CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM]] -// CHECK4-NEXT: store float 0.000000e+00, float* [[ARRAYIDX]], align 4, !llvm.access.group !2 -// CHECK4-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !2 +// CHECK4-NEXT: store float 0.000000e+00, float* [[ARRAYIDX]], align 4, !llvm.access.group !3 +// CHECK4-NEXT: call void @__captured_stmt(i32* [[I5]]), !llvm.access.group !3 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 // CHECK4-NEXT: [[ADD9:%.*]] = add i32 [[TMP13]], 1 -// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !2 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 // CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_1]], align 4 @@ -2424,31 +2424,31 @@ void foo_simd(int low, int up) { // CHECK4-NEXT: store i32 [[TMP27]], i32* [[DOTOMP_IV16]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND29:%.*]] // CHECK4: omp.inner.for.cond29: -// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4 -// CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[ADD30:%.*]] = add i32 [[TMP29]], 1 // CHECK4-NEXT: [[CMP31:%.*]] = icmp ult i32 [[TMP28]], [[ADD30]] // CHECK4-NEXT: br i1 [[CMP31]], label [[OMP_INNER_FOR_BODY32:%.*]], label [[OMP_INNER_FOR_END40:%.*]] // CHECK4: omp.inner.for.body32: -// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4 -// CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_18]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[MUL33:%.*]] = mul i32 [[TMP31]], 1 // CHECK4-NEXT: [[ADD34:%.*]] = add i32 [[TMP30]], [[MUL33]] -// CHECK4-NEXT: store i32 [[ADD34]], i32* [[I28]], align 4 -// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[I28]], align 4 +// CHECK4-NEXT: store i32 [[ADD34]], i32* [[I28]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[I28]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[IDXPROM35:%.*]] = sext i32 [[TMP32]] to i64 // CHECK4-NEXT: [[ARRAYIDX36:%.*]] = getelementptr inbounds [10 x float], [10 x float]* @f, i64 0, i64 [[IDXPROM35]] -// CHECK4-NEXT: store float 0.000000e+00, float* [[ARRAYIDX36]], align 4 -// CHECK4-NEXT: call void @__captured_stmt.1(i32* [[I28]]) +// CHECK4-NEXT: store float 0.000000e+00, float* [[ARRAYIDX36]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: call void @__captured_stmt.1(i32* [[I28]]), !llvm.access.group !7 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE37:%.*]] // CHECK4: omp.body.continue37: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC38:%.*]] // CHECK4: omp.inner.for.inc38: -// CHECK4-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4 +// CHECK4-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[ADD39:%.*]] = add i32 [[TMP33]], 1 -// CHECK4-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4 -// CHECK4-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]) -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD39]], i32* [[DOTOMP_IV16]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: call void @__kmpc_dispatch_fini_4u(%struct.ident_t* @[[GLOB1]], i32 [[TMP0]]), !llvm.access.group !7 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND29]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK4: omp.inner.for.end40: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: diff --git a/clang/test/OpenMP/parallel_for_codegen.cpp b/clang/test/OpenMP/parallel_for_codegen.cpp index 7522d118f0a6f..3550e76f21c4b 100644 --- a/clang/test/OpenMP/parallel_for_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_codegen.cpp @@ -769,42 +769,42 @@ void range_for_collapsed() { // CHECK1-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !5 // CHECK1-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1 // CHECK1-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 // CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127 // CHECK1-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] -// CHECK1-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP12]] -// CHECK1-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 // CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP15]] -// CHECK1-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] -// CHECK1-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[TMP18]] -// CHECK1-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] -// CHECK1-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 // CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[TMP21]] -// CHECK1-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 // CHECK1-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1 -// CHECK1-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK1-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -871,42 +871,42 @@ void range_for_collapsed() { // CHECK1-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !7 -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !7 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !8 // CHECK1-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1 // CHECK1-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !7 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 // CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127 // CHECK1-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] -// CHECK1-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !llvm.access.group !7 -// CHECK1-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !7 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !7 +// CHECK1-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP12]] -// CHECK1-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !7 -// CHECK1-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !7 -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !7 +// CHECK1-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 // CHECK1-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP15]] -// CHECK1-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !7 +// CHECK1-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] -// CHECK1-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !7 -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !7 +// CHECK1-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 // CHECK1-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[TMP18]] -// CHECK1-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !7 +// CHECK1-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] -// CHECK1-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !7 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !7 +// CHECK1-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 // CHECK1-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[TMP21]] -// CHECK1-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !7 +// CHECK1-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !7 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 // CHECK1-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1 -// CHECK1-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !7 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK1-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -1010,58 +1010,58 @@ void range_for_collapsed() { // CHECK1-NEXT: store i64 [[TMP16]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !11 // CHECK1-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] // CHECK1-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP19:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !11 // CHECK1-NEXT: [[CONV10:%.*]] = sext i8 [[TMP19]] to i64 -// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 // CHECK1-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP20]], 11 // CHECK1-NEXT: [[MUL12:%.*]] = mul nsw i64 [[DIV11]], 1 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i64 [[CONV10]], [[MUL12]] // CHECK1-NEXT: [[CONV14:%.*]] = trunc i64 [[ADD13]] to i8 -// CHECK1-NEXT: store i8 [[CONV14]], i8* [[I7]], align 1, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK1-NEXT: store i8 [[CONV14]], i8* [[I7]], align 1, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 // CHECK1-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP22]], 11 // CHECK1-NEXT: [[MUL16:%.*]] = mul nsw i64 [[DIV15]], 11 // CHECK1-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP21]], [[MUL16]] // CHECK1-NEXT: [[MUL18:%.*]] = mul nsw i64 [[SUB17]], 1 // CHECK1-NEXT: [[SUB19:%.*]] = sub nsw i64 11, [[MUL18]] // CHECK1-NEXT: [[CONV20:%.*]] = trunc i64 [[SUB19]] to i32 -// CHECK1-NEXT: store i32 [[CONV20]], i32* [[X8]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP24:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !10 +// CHECK1-NEXT: store i32 [[CONV20]], i32* [[X8]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP24:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP24]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP27:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP27:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 // CHECK1-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP27]] to i64 // CHECK1-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, float* [[TMP26]], i64 [[IDXPROM21]] -// CHECK1-NEXT: [[TMP28:%.*]] = load float, float* [[ARRAYIDX22]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP28:%.*]] = load float, float* [[ARRAYIDX22]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[MUL23:%.*]] = fmul float [[TMP25]], [[TMP28]] -// CHECK1-NEXT: [[TMP29:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP30:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP29:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP30:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 // CHECK1-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP30]] to i64 // CHECK1-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, float* [[TMP29]], i64 [[IDXPROM24]] -// CHECK1-NEXT: [[TMP31:%.*]] = load float, float* [[ARRAYIDX25]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP31:%.*]] = load float, float* [[ARRAYIDX25]], align 4, !llvm.access.group !11 // CHECK1-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP31]] -// CHECK1-NEXT: [[TMP32:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP33:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP32:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP33:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 // CHECK1-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP33]] to i64 // CHECK1-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, float* [[TMP32]], i64 [[IDXPROM27]] -// CHECK1-NEXT: store float [[MUL26]], float* [[ARRAYIDX28]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: store float [[MUL26]], float* [[ARRAYIDX28]], align 4, !llvm.access.group !11 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 // CHECK1-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP34]], 1 -// CHECK1-NEXT: store i64 [[ADD29]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK1-NEXT: store i64 [[ADD29]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -1134,55 +1134,55 @@ void range_for_collapsed() { // CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 20 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 48, [[MUL]] // CHECK1-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i8 -// CHECK1-NEXT: store i8 [[CONV]], i8* [[I]], align 1, !llvm.access.group !13 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: store i8 [[CONV]], i8* [[I]], align 1, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP12]], 20 // CHECK1-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 20 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL3]] // CHECK1-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 -10, [[MUL4]] -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[X]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !13 -// CHECK1-NEXT: [[TMP14:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !13 +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[X]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP14:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 // CHECK1-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP14]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 [[IDXPROM]] -// CHECK1-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !13 -// CHECK1-NEXT: [[TMP17:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !13 +// CHECK1-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP17:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 // CHECK1-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM6]] -// CHECK1-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK1-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !13 -// CHECK1-NEXT: [[TMP20:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !13 +// CHECK1-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP20:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 // CHECK1-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP20]] to i64 // CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM9]] -// CHECK1-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP21]] -// CHECK1-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !13 -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !13 +// CHECK1-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP23:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 // CHECK1-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP23]] to i64 // CHECK1-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM12]] -// CHECK1-NEXT: store float [[MUL11]], float* [[ARRAYIDX13]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: store float [[MUL11]], float* [[ARRAYIDX13]], align 4, !llvm.access.group !14 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK1-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -1883,42 +1883,42 @@ void range_for_collapsed() { // CHECK2-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_IV]], align 8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !5 // CHECK2-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1 // CHECK2-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 // CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127 // CHECK2-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] -// CHECK2-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !llvm.access.group !4 -// CHECK2-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP12]] -// CHECK2-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !4 -// CHECK2-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 // CHECK2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP15]] -// CHECK2-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] -// CHECK2-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !4 -// CHECK2-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 // CHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[TMP18]] -// CHECK2-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] -// CHECK2-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !4 -// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 // CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[TMP21]] -// CHECK2-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !5 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 // CHECK2-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1 -// CHECK2-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK2-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -1985,42 +1985,42 @@ void range_for_collapsed() { // CHECK2-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_IV]], align 8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !7 -// CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !7 +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !8 // CHECK2-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1 // CHECK2-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !7 +// CHECK2-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 // CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127 // CHECK2-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] -// CHECK2-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !llvm.access.group !7 -// CHECK2-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !7 -// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !7 +// CHECK2-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP12]] -// CHECK2-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !7 -// CHECK2-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !7 -// CHECK2-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !7 +// CHECK2-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 // CHECK2-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP15]] -// CHECK2-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !7 +// CHECK2-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] -// CHECK2-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !7 -// CHECK2-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !7 +// CHECK2-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 // CHECK2-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[TMP18]] -// CHECK2-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !7 +// CHECK2-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] -// CHECK2-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !7 -// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !7 +// CHECK2-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 // CHECK2-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[TMP21]] -// CHECK2-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !7 +// CHECK2-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !8 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !7 +// CHECK2-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 // CHECK2-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1 -// CHECK2-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !7 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK2-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -2124,58 +2124,58 @@ void range_for_collapsed() { // CHECK2-NEXT: store i64 [[TMP16]], i64* [[DOTOMP_IV]], align 8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK2-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !11 // CHECK2-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] // CHECK2-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP19:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP19:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !11 // CHECK2-NEXT: [[CONV10:%.*]] = sext i8 [[TMP19]] to i64 -// CHECK2-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 // CHECK2-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP20]], 11 // CHECK2-NEXT: [[MUL12:%.*]] = mul nsw i64 [[DIV11]], 1 // CHECK2-NEXT: [[ADD13:%.*]] = add nsw i64 [[CONV10]], [[MUL12]] // CHECK2-NEXT: [[CONV14:%.*]] = trunc i64 [[ADD13]] to i8 -// CHECK2-NEXT: store i8 [[CONV14]], i8* [[I7]], align 1, !llvm.access.group !10 -// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK2-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK2-NEXT: store i8 [[CONV14]], i8* [[I7]], align 1, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 // CHECK2-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP22]], 11 // CHECK2-NEXT: [[MUL16:%.*]] = mul nsw i64 [[DIV15]], 11 // CHECK2-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP21]], [[MUL16]] // CHECK2-NEXT: [[MUL18:%.*]] = mul nsw i64 [[SUB17]], 1 // CHECK2-NEXT: [[SUB19:%.*]] = sub nsw i64 11, [[MUL18]] // CHECK2-NEXT: [[CONV20:%.*]] = trunc i64 [[SUB19]] to i32 -// CHECK2-NEXT: store i32 [[CONV20]], i32* [[X8]], align 4, !llvm.access.group !10 -// CHECK2-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !10 -// CHECK2-NEXT: [[TMP24:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !10 +// CHECK2-NEXT: store i32 [[CONV20]], i32* [[X8]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP24:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP24]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK2-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !10 -// CHECK2-NEXT: [[TMP27:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP27:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 // CHECK2-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP27]] to i64 // CHECK2-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, float* [[TMP26]], i64 [[IDXPROM21]] -// CHECK2-NEXT: [[TMP28:%.*]] = load float, float* [[ARRAYIDX22]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP28:%.*]] = load float, float* [[ARRAYIDX22]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[MUL23:%.*]] = fmul float [[TMP25]], [[TMP28]] -// CHECK2-NEXT: [[TMP29:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !10 -// CHECK2-NEXT: [[TMP30:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP29:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP30:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 // CHECK2-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP30]] to i64 // CHECK2-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, float* [[TMP29]], i64 [[IDXPROM24]] -// CHECK2-NEXT: [[TMP31:%.*]] = load float, float* [[ARRAYIDX25]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP31:%.*]] = load float, float* [[ARRAYIDX25]], align 4, !llvm.access.group !11 // CHECK2-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP31]] -// CHECK2-NEXT: [[TMP32:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !10 -// CHECK2-NEXT: [[TMP33:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP32:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP33:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 // CHECK2-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP33]] to i64 // CHECK2-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, float* [[TMP32]], i64 [[IDXPROM27]] -// CHECK2-NEXT: store float [[MUL26]], float* [[ARRAYIDX28]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: store float [[MUL26]], float* [[ARRAYIDX28]], align 4, !llvm.access.group !11 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 // CHECK2-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP34]], 1 -// CHECK2-NEXT: store i64 [[ADD29]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK2-NEXT: store i64 [[ADD29]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -2248,55 +2248,55 @@ void range_for_collapsed() { // CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 20 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 48, [[MUL]] // CHECK2-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i8 -// CHECK2-NEXT: store i8 [[CONV]], i8* [[I]], align 1, !llvm.access.group !13 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK2-NEXT: store i8 [[CONV]], i8* [[I]], align 1, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP12]], 20 // CHECK2-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 20 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL3]] // CHECK2-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 -10, [[MUL4]] -// CHECK2-NEXT: store i32 [[ADD5]], i32* [[X]], align 4, !llvm.access.group !13 -// CHECK2-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !13 -// CHECK2-NEXT: [[TMP14:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !13 +// CHECK2-NEXT: store i32 [[ADD5]], i32* [[X]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP14:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 // CHECK2-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP14]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 [[IDXPROM]] -// CHECK2-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !13 -// CHECK2-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !13 -// CHECK2-NEXT: [[TMP17:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !13 +// CHECK2-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP17:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 // CHECK2-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP17]] to i64 // CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM6]] -// CHECK2-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !13 +// CHECK2-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK2-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !13 -// CHECK2-NEXT: [[TMP20:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !13 +// CHECK2-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP20:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 // CHECK2-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP20]] to i64 // CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM9]] -// CHECK2-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !13 +// CHECK2-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP21]] -// CHECK2-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !13 -// CHECK2-NEXT: [[TMP23:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !13 +// CHECK2-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP23:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 // CHECK2-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP23]] to i64 // CHECK2-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM12]] -// CHECK2-NEXT: store float [[MUL11]], float* [[ARRAYIDX13]], align 4, !llvm.access.group !13 +// CHECK2-NEXT: store float [[MUL11]], float* [[ARRAYIDX13]], align 4, !llvm.access.group !14 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK2-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK2-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -2997,42 +2997,42 @@ void range_for_collapsed() { // CHECK3-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !5 // CHECK3-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1 // CHECK3-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 // CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127 // CHECK3-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] -// CHECK3-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !llvm.access.group !4 -// CHECK3-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !4 +// CHECK3-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP12]] -// CHECK3-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !4 -// CHECK3-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !4 +// CHECK3-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 // CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP15]] -// CHECK3-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !4 +// CHECK3-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !5 // CHECK3-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] -// CHECK3-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !4 -// CHECK3-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !4 +// CHECK3-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 // CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[TMP18]] -// CHECK3-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !4 +// CHECK3-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !5 // CHECK3-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] -// CHECK3-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !4 +// CHECK3-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !5 +// CHECK3-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 // CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[TMP21]] -// CHECK3-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !4 +// CHECK3-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !5 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 // CHECK3-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1 -// CHECK3-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK3-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -3099,42 +3099,42 @@ void range_for_collapsed() { // CHECK3-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !7 -// CHECK3-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 +// CHECK3-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !8 // CHECK3-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1 // CHECK3-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 // CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127 // CHECK3-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] -// CHECK3-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !llvm.access.group !7 -// CHECK3-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !7 -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !7 +// CHECK3-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !llvm.access.group !8 +// CHECK3-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP12]] -// CHECK3-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !7 -// CHECK3-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !7 -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !8 +// CHECK3-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !8 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 // CHECK3-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP15]] -// CHECK3-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !8 // CHECK3-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] -// CHECK3-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !7 -// CHECK3-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !8 +// CHECK3-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 // CHECK3-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[TMP18]] -// CHECK3-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !8 // CHECK3-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] -// CHECK3-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !7 -// CHECK3-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !8 +// CHECK3-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 // CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[TMP21]] -// CHECK3-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !8 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 // CHECK3-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1 -// CHECK3-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !7 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK3-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -3238,58 +3238,58 @@ void range_for_collapsed() { // CHECK3-NEXT: store i64 [[TMP16]], i64* [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !11 // CHECK3-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] // CHECK3-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP19:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP19:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !11 // CHECK3-NEXT: [[CONV10:%.*]] = sext i8 [[TMP19]] to i64 -// CHECK3-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 // CHECK3-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP20]], 11 // CHECK3-NEXT: [[MUL12:%.*]] = mul nsw i64 [[DIV11]], 1 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i64 [[CONV10]], [[MUL12]] // CHECK3-NEXT: [[CONV14:%.*]] = trunc i64 [[ADD13]] to i8 -// CHECK3-NEXT: store i8 [[CONV14]], i8* [[I7]], align 1, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK3-NEXT: store i8 [[CONV14]], i8* [[I7]], align 1, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 // CHECK3-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP22]], 11 // CHECK3-NEXT: [[MUL16:%.*]] = mul nsw i64 [[DIV15]], 11 // CHECK3-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP21]], [[MUL16]] // CHECK3-NEXT: [[MUL18:%.*]] = mul nsw i64 [[SUB17]], 1 // CHECK3-NEXT: [[SUB19:%.*]] = sub nsw i64 11, [[MUL18]] // CHECK3-NEXT: [[CONV20:%.*]] = trunc i64 [[SUB19]] to i32 -// CHECK3-NEXT: store i32 [[CONV20]], i32* [[X8]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP24:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !10 +// CHECK3-NEXT: store i32 [[CONV20]], i32* [[X8]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP24:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 // CHECK3-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP24]] to i64 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM]] -// CHECK3-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP27:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP27:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 // CHECK3-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP27]] to i64 // CHECK3-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, float* [[TMP26]], i64 [[IDXPROM21]] -// CHECK3-NEXT: [[TMP28:%.*]] = load float, float* [[ARRAYIDX22]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP28:%.*]] = load float, float* [[ARRAYIDX22]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[MUL23:%.*]] = fmul float [[TMP25]], [[TMP28]] -// CHECK3-NEXT: [[TMP29:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP30:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP29:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP30:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 // CHECK3-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP30]] to i64 // CHECK3-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, float* [[TMP29]], i64 [[IDXPROM24]] -// CHECK3-NEXT: [[TMP31:%.*]] = load float, float* [[ARRAYIDX25]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP31:%.*]] = load float, float* [[ARRAYIDX25]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP31]] -// CHECK3-NEXT: [[TMP32:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP33:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP32:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP33:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 // CHECK3-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP33]] to i64 // CHECK3-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, float* [[TMP32]], i64 [[IDXPROM27]] -// CHECK3-NEXT: store float [[MUL26]], float* [[ARRAYIDX28]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: store float [[MUL26]], float* [[ARRAYIDX28]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 // CHECK3-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP34]], 1 -// CHECK3-NEXT: store i64 [[ADD29]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK3-NEXT: store i64 [[ADD29]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -3362,55 +3362,55 @@ void range_for_collapsed() { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK3-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 20 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 48, [[MUL]] // CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i8 -// CHECK3-NEXT: store i8 [[CONV]], i8* [[I]], align 1, !llvm.access.group !13 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: store i8 [[CONV]], i8* [[I]], align 1, !llvm.access.group !14 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK3-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP12]], 20 // CHECK3-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 20 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL3]] // CHECK3-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 -10, [[MUL4]] -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[X]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !13 -// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !13 +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[X]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !14 +// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 // CHECK3-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP14]] to i64 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 [[IDXPROM]] -// CHECK3-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !13 -// CHECK3-NEXT: [[TMP17:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !14 +// CHECK3-NEXT: [[TMP17:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 // CHECK3-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP17]] to i64 // CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM6]] -// CHECK3-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !14 // CHECK3-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK3-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !13 -// CHECK3-NEXT: [[TMP20:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !14 +// CHECK3-NEXT: [[TMP20:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 // CHECK3-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP20]] to i64 // CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM9]] -// CHECK3-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !14 // CHECK3-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP21]] -// CHECK3-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !13 -// CHECK3-NEXT: [[TMP23:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !14 +// CHECK3-NEXT: [[TMP23:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 // CHECK3-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP23]] to i64 // CHECK3-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM12]] -// CHECK3-NEXT: store float [[MUL11]], float* [[ARRAYIDX13]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: store float [[MUL11]], float* [[ARRAYIDX13]], align 4, !llvm.access.group !14 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK3-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK3-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -4111,42 +4111,42 @@ void range_for_collapsed() { // CHECK4-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_IV]], align 8 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !5 // CHECK4-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1 // CHECK4-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 // CHECK4-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127 // CHECK4-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] -// CHECK4-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !llvm.access.group !4 -// CHECK4-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !4 +// CHECK4-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP12]] -// CHECK4-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !4 -// CHECK4-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !4 +// CHECK4-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 // CHECK4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP15]] -// CHECK4-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !4 +// CHECK4-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !5 // CHECK4-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] -// CHECK4-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !4 -// CHECK4-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !4 +// CHECK4-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 // CHECK4-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[TMP18]] -// CHECK4-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !4 +// CHECK4-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !5 // CHECK4-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] -// CHECK4-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !4 +// CHECK4-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !5 +// CHECK4-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 // CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[TMP21]] -// CHECK4-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !4 +// CHECK4-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !5 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !4 +// CHECK4-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 // CHECK4-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1 -// CHECK4-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK4-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -4213,42 +4213,42 @@ void range_for_collapsed() { // CHECK4-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_IV]], align 8 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !7 -// CHECK4-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !7 +// CHECK4-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 +// CHECK4-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !8 // CHECK4-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1 // CHECK4-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !7 +// CHECK4-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 // CHECK4-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127 // CHECK4-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] -// CHECK4-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !llvm.access.group !7 -// CHECK4-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !7 -// CHECK4-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !7 +// CHECK4-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !llvm.access.group !8 +// CHECK4-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP12]] -// CHECK4-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !7 -// CHECK4-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !7 -// CHECK4-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !7 +// CHECK4-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !8 +// CHECK4-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !8 +// CHECK4-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 // CHECK4-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP15]] -// CHECK4-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !8 // CHECK4-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] -// CHECK4-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !7 -// CHECK4-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !7 +// CHECK4-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !8 +// CHECK4-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 // CHECK4-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[TMP18]] -// CHECK4-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !8 // CHECK4-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] -// CHECK4-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !7 -// CHECK4-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !7 +// CHECK4-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !8 +// CHECK4-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 // CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[TMP21]] -// CHECK4-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !8 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !7 +// CHECK4-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 // CHECK4-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1 -// CHECK4-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !7 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK4-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -4352,58 +4352,58 @@ void range_for_collapsed() { // CHECK4-NEXT: store i64 [[TMP16]], i64* [[DOTOMP_IV]], align 8 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK4-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !11 // CHECK4-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] // CHECK4-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP19:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP19:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !11 // CHECK4-NEXT: [[CONV10:%.*]] = sext i8 [[TMP19]] to i64 -// CHECK4-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 // CHECK4-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP20]], 11 // CHECK4-NEXT: [[MUL12:%.*]] = mul nsw i64 [[DIV11]], 1 // CHECK4-NEXT: [[ADD13:%.*]] = add nsw i64 [[CONV10]], [[MUL12]] // CHECK4-NEXT: [[CONV14:%.*]] = trunc i64 [[ADD13]] to i8 -// CHECK4-NEXT: store i8 [[CONV14]], i8* [[I7]], align 1, !llvm.access.group !10 -// CHECK4-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK4-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK4-NEXT: store i8 [[CONV14]], i8* [[I7]], align 1, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 // CHECK4-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP22]], 11 // CHECK4-NEXT: [[MUL16:%.*]] = mul nsw i64 [[DIV15]], 11 // CHECK4-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP21]], [[MUL16]] // CHECK4-NEXT: [[MUL18:%.*]] = mul nsw i64 [[SUB17]], 1 // CHECK4-NEXT: [[SUB19:%.*]] = sub nsw i64 11, [[MUL18]] // CHECK4-NEXT: [[CONV20:%.*]] = trunc i64 [[SUB19]] to i32 -// CHECK4-NEXT: store i32 [[CONV20]], i32* [[X8]], align 4, !llvm.access.group !10 -// CHECK4-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !10 -// CHECK4-NEXT: [[TMP24:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !10 +// CHECK4-NEXT: store i32 [[CONV20]], i32* [[X8]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP24:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 // CHECK4-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP24]] to i64 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM]] -// CHECK4-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK4-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !10 -// CHECK4-NEXT: [[TMP27:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP27:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 // CHECK4-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP27]] to i64 // CHECK4-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, float* [[TMP26]], i64 [[IDXPROM21]] -// CHECK4-NEXT: [[TMP28:%.*]] = load float, float* [[ARRAYIDX22]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP28:%.*]] = load float, float* [[ARRAYIDX22]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[MUL23:%.*]] = fmul float [[TMP25]], [[TMP28]] -// CHECK4-NEXT: [[TMP29:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !10 -// CHECK4-NEXT: [[TMP30:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP29:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP30:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 // CHECK4-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP30]] to i64 // CHECK4-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, float* [[TMP29]], i64 [[IDXPROM24]] -// CHECK4-NEXT: [[TMP31:%.*]] = load float, float* [[ARRAYIDX25]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP31:%.*]] = load float, float* [[ARRAYIDX25]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP31]] -// CHECK4-NEXT: [[TMP32:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !10 -// CHECK4-NEXT: [[TMP33:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP32:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP33:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 // CHECK4-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP33]] to i64 // CHECK4-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, float* [[TMP32]], i64 [[IDXPROM27]] -// CHECK4-NEXT: store float [[MUL26]], float* [[ARRAYIDX28]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: store float [[MUL26]], float* [[ARRAYIDX28]], align 4, !llvm.access.group !11 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 // CHECK4-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP34]], 1 -// CHECK4-NEXT: store i64 [[ADD29]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK4-NEXT: store i64 [[ADD29]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -4476,55 +4476,55 @@ void range_for_collapsed() { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK4-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 20 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 48, [[MUL]] // CHECK4-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i8 -// CHECK4-NEXT: store i8 [[CONV]], i8* [[I]], align 1, !llvm.access.group !13 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: store i8 [[CONV]], i8* [[I]], align 1, !llvm.access.group !14 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK4-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP12]], 20 // CHECK4-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 20 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL3]] // CHECK4-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 -10, [[MUL4]] -// CHECK4-NEXT: store i32 [[ADD5]], i32* [[X]], align 4, !llvm.access.group !13 -// CHECK4-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !13 -// CHECK4-NEXT: [[TMP14:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !13 +// CHECK4-NEXT: store i32 [[ADD5]], i32* [[X]], align 4, !llvm.access.group !14 +// CHECK4-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !14 +// CHECK4-NEXT: [[TMP14:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 // CHECK4-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP14]] to i64 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 [[IDXPROM]] -// CHECK4-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !13 -// CHECK4-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !13 -// CHECK4-NEXT: [[TMP17:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !14 +// CHECK4-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !14 +// CHECK4-NEXT: [[TMP17:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 // CHECK4-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP17]] to i64 // CHECK4-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM6]] -// CHECK4-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !14 // CHECK4-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK4-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !13 -// CHECK4-NEXT: [[TMP20:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !14 +// CHECK4-NEXT: [[TMP20:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 // CHECK4-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP20]] to i64 // CHECK4-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM9]] -// CHECK4-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !14 // CHECK4-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP21]] -// CHECK4-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !13 -// CHECK4-NEXT: [[TMP23:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !14 +// CHECK4-NEXT: [[TMP23:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 // CHECK4-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP23]] to i64 // CHECK4-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM12]] -// CHECK4-NEXT: store float [[MUL11]], float* [[ARRAYIDX13]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: store float [[MUL11]], float* [[ARRAYIDX13]], align 4, !llvm.access.group !14 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK4-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK4-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -4697,25 +4697,25 @@ void range_for_collapsed() { // // // CHECK5-LABEL: define {{[^@]+}}@_Z17with_var_schedulev -// CHECK5-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG6:![0-9]+]] { +// CHECK5-SAME: () #[[ATTR0:[0-9]+]] !dbg [[DBG7:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A:%.*]] = alloca double, align 8 // CHECK5-NEXT: [[DOTCAPTURE_EXPR_:%.*]] = alloca i8, align 1 // CHECK5-NEXT: [[DOTCAPTURE_EXPR__CASTED:%.*]] = alloca i64, align 8 -// CHECK5-NEXT: store double 5.000000e+00, double* [[A]], align 8, !dbg [[DBG9:![0-9]+]] -// CHECK5-NEXT: [[TMP0:%.*]] = load double, double* [[A]], align 8, !dbg [[DBG10:![0-9]+]] -// CHECK5-NEXT: [[CONV:%.*]] = fptosi double [[TMP0]] to i8, !dbg [[DBG10]] -// CHECK5-NEXT: store i8 [[CONV]], i8* [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG10]] -// CHECK5-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG10]] -// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8*, !dbg [[DBG10]] -// CHECK5-NEXT: store i8 [[TMP1]], i8* [[CONV1]], align 1, !dbg [[DBG10]] -// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !dbg [[DBG10]] -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP2]]), !dbg [[DBG10]] -// CHECK5-NEXT: ret void, !dbg [[DBG11:![0-9]+]] +// CHECK5-NEXT: store double 5.000000e+00, double* [[A]], align 8, !dbg [[DBG10:![0-9]+]] +// CHECK5-NEXT: [[TMP0:%.*]] = load double, double* [[A]], align 8, !dbg [[DBG11:![0-9]+]] +// CHECK5-NEXT: [[CONV:%.*]] = fptosi double [[TMP0]] to i8, !dbg [[DBG11]] +// CHECK5-NEXT: store i8 [[CONV]], i8* [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG11]] +// CHECK5-NEXT: [[TMP1:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG11]] +// CHECK5-NEXT: [[CONV1:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8*, !dbg [[DBG11]] +// CHECK5-NEXT: store i8 [[TMP1]], i8* [[CONV1]], align 1, !dbg [[DBG11]] +// CHECK5-NEXT: [[TMP2:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !dbg [[DBG11]] +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB4:[0-9]+]], i32 1, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64)* @.omp_outlined. to void (i32*, i32*, ...)*), i64 [[TMP2]]), !dbg [[DBG11]] +// CHECK5-NEXT: ret void, !dbg [[DBG12:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined. -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG12:![0-9]+]] { +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i64 [[DOTCAPTURE_EXPR_:%.*]]) #[[ATTR1:[0-9]+]] !dbg [[DBG13:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -4734,97 +4734,97 @@ void range_for_collapsed() { // CHECK5-NEXT: store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8 // CHECK5-NEXT: store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8 // CHECK5-NEXT: store i64 [[DOTCAPTURE_EXPR_]], i64* [[DOTCAPTURE_EXPR__ADDR]], align 8 -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8*, !dbg [[DBG13:![0-9]+]] -// CHECK5-NEXT: [[TMP0:%.*]] = load double, double* undef, align 8, !dbg [[DBG14:![0-9]+]] -// CHECK5-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP0]], !dbg [[DBG14]] -// CHECK5-NEXT: store double [[ADD]], double* [[DOTCAPTURE_EXPR_1]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP1:%.*]] = load double, double* [[DOTCAPTURE_EXPR_1]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[SUB:%.*]] = fsub double [[TMP1]], 1.000000e+00, !dbg [[DBG14]] -// CHECK5-NEXT: [[DIV:%.*]] = fdiv double [[SUB]], 1.000000e+00, !dbg [[DBG14]] -// CHECK5-NEXT: [[CONV3:%.*]] = fptoui double [[DIV]] to i64, !dbg [[DBG14]] -// CHECK5-NEXT: [[SUB4:%.*]] = sub i64 [[CONV3]], 1, !dbg [[DBG14]] -// CHECK5-NEXT: store i64 [[SUB4]], i64* [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: store i64 1, i64* [[I]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP2:%.*]] = load double, double* [[DOTCAPTURE_EXPR_1]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[CMP:%.*]] = fcmp olt double 1.000000e+00, [[TMP2]], !dbg [[DBG14]] -// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]], !dbg [[DBG13]] +// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__ADDR]] to i8*, !dbg [[DBG14:![0-9]+]] +// CHECK5-NEXT: [[TMP0:%.*]] = load double, double* undef, align 8, !dbg [[DBG15:![0-9]+]] +// CHECK5-NEXT: [[ADD:%.*]] = fadd double 2.000000e+00, [[TMP0]], !dbg [[DBG15]] +// CHECK5-NEXT: store double [[ADD]], double* [[DOTCAPTURE_EXPR_1]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP1:%.*]] = load double, double* [[DOTCAPTURE_EXPR_1]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[SUB:%.*]] = fsub double [[TMP1]], 1.000000e+00, !dbg [[DBG15]] +// CHECK5-NEXT: [[DIV:%.*]] = fdiv double [[SUB]], 1.000000e+00, !dbg [[DBG15]] +// CHECK5-NEXT: [[CONV3:%.*]] = fptoui double [[DIV]] to i64, !dbg [[DBG15]] +// CHECK5-NEXT: [[SUB4:%.*]] = sub i64 [[CONV3]], 1, !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[SUB4]], i64* [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: store i64 1, i64* [[I]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP2:%.*]] = load double, double* [[DOTCAPTURE_EXPR_1]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[CMP:%.*]] = fcmp olt double 1.000000e+00, [[TMP2]], !dbg [[DBG15]] +// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]], !dbg [[DBG14]] // CHECK5: omp.precond.then: -// CHECK5-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: store i64 [[TMP3]], i64* [[DOTOMP_UB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 8, !dbg [[DBG13]] -// CHECK5-NEXT: [[CONV6:%.*]] = sext i8 [[TMP4]] to i64, !dbg [[DBG13]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG13]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4, !dbg [[DBG13]] -// CHECK5-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 33, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV6]]), !dbg [[DBG13]] -// CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG13]] +// CHECK5-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP3:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[TMP3]], i64* [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i8, i8* [[CONV]], align 8, !dbg [[DBG14]] +// CHECK5-NEXT: [[CONV6:%.*]] = sext i8 [[TMP4]] to i64, !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[TMP5]], align 4, !dbg [[DBG14]] +// CHECK5-NEXT: call void @__kmpc_for_static_init_8u(%struct.ident_t* @[[GLOB1:[0-9]+]], i32 [[TMP6]], i32 33, i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]], i64 1, i64 [[CONV6]]), !dbg [[DBG14]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG14]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[CMP7:%.*]] = icmp ugt i64 [[TMP7]], [[TMP8]], !dbg [[DBG14]] -// CHECK5-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[CMP7:%.*]] = icmp ugt i64 [[TMP7]], [[TMP8]], !dbg [[DBG15]] +// CHECK5-NEXT: br i1 [[CMP7]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG15]] // CHECK5: cond.true: -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: br label [[COND_END:%.*]], !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: br label [[COND_END:%.*]], !dbg [[DBG15]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: br label [[COND_END]], !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: br label [[COND_END]], !dbg [[DBG15]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ], !dbg [[DBG14]] -// CHECK5-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_IV]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[ADD8:%.*]] = add i64 [[TMP13]], 1, !dbg [[DBG14]] -// CHECK5-NEXT: [[CMP9:%.*]] = icmp ult i64 [[TMP12]], [[ADD8]], !dbg [[DBG14]] -// CHECK5-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG13]] +// CHECK5-NEXT: [[COND:%.*]] = phi i64 [ [[TMP9]], [[COND_TRUE]] ], [ [[TMP10]], [[COND_FALSE]] ], !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[COND]], i64* [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[TMP11]], i64* [[DOTOMP_IV]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[ADD8:%.*]] = add i64 [[TMP13]], 1, !dbg [[DBG15]] +// CHECK5-NEXT: [[CMP9:%.*]] = icmp ult i64 [[TMP12]], [[ADD8]], !dbg [[DBG15]] +// CHECK5-NEXT: br i1 [[CMP9]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG14]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG13]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG14]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[ADD10:%.*]] = add i64 [[TMP15]], 1, !dbg [[DBG14]] -// CHECK5-NEXT: [[CMP11:%.*]] = icmp ult i64 [[TMP14]], [[ADD10]], !dbg [[DBG14]] -// CHECK5-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG13]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[ADD10:%.*]] = add i64 [[TMP15]], 1, !dbg [[DBG15]] +// CHECK5-NEXT: [[CMP11:%.*]] = icmp ult i64 [[TMP14]], [[ADD10]], !dbg [[DBG15]] +// CHECK5-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG14]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 1, !dbg [[DBG14]] -// CHECK5-NEXT: [[ADD12:%.*]] = add i64 1, [[MUL]], !dbg [[DBG14]] -// CHECK5-NEXT: store i64 [[ADD12]], i64* [[I5]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG15:![0-9]+]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP16]], 1, !dbg [[DBG15]] +// CHECK5-NEXT: [[ADD12:%.*]] = add i64 1, [[MUL]], !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[ADD12]], i64* [[I5]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG16:![0-9]+]] // CHECK5: omp.body.continue: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG13]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG14]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[ADD13:%.*]] = add i64 [[TMP17]], 1, !dbg [[DBG14]] -// CHECK5-NEXT: store i64 [[ADD13]], i64* [[DOTOMP_IV]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG13]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[ADD13:%.*]] = add i64 [[TMP17]], 1, !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[ADD13]], i64* [[DOTOMP_IV]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG14]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK5: omp.inner.for.end: -// CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG13]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG14]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[ADD14:%.*]] = add i64 [[TMP18]], [[TMP19]], !dbg [[DBG14]] -// CHECK5-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_LB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: [[ADD15:%.*]] = add i64 [[TMP20]], [[TMP21]], !dbg [[DBG14]] -// CHECK5-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_UB]], align 8, !dbg [[DBG14]] -// CHECK5-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG13]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[ADD14:%.*]] = add i64 [[TMP18]], [[TMP19]], !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[ADD14]], i64* [[DOTOMP_LB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_STRIDE]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: [[ADD15:%.*]] = add i64 [[TMP20]], [[TMP21]], !dbg [[DBG15]] +// CHECK5-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_UB]], align 8, !dbg [[DBG15]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG14]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG13]] -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4, !dbg [[DBG13]] -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP23]]), !dbg [[DBG13]] -// CHECK5-NEXT: br label [[OMP_PRECOND_END]], !dbg [[DBG13]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG14]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4, !dbg [[DBG14]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB3:[0-9]+]], i32 [[TMP23]]), !dbg [[DBG14]] +// CHECK5-NEXT: br label [[OMP_PRECOND_END]], !dbg [[DBG14]] // CHECK5: omp.precond.end: -// CHECK5-NEXT: ret void, !dbg [[DBG15]] +// CHECK5-NEXT: ret void, !dbg [[DBG16]] // // // CHECK5-LABEL: define {{[^@]+}}@_Z23without_schedule_clausePfS_S_S_ -// CHECK5-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR0]] !dbg [[DBG20:![0-9]+]] { +// CHECK5-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR0]] !dbg [[DBG21:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 @@ -4834,12 +4834,12 @@ void range_for_collapsed() { // CHECK5-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK5-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK5-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB9:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]), !dbg [[DBG21:![0-9]+]] -// CHECK5-NEXT: ret void, !dbg [[DBG22:![0-9]+]] +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB9:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..1 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]), !dbg [[DBG22:![0-9]+]] +// CHECK5-NEXT: ret void, !dbg [[DBG23:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..1 -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], float** nonnull align 8 dereferenceable(8) [[B:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]], float** nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG23:![0-9]+]] { +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], float** nonnull align 8 dereferenceable(8) [[B:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]], float** nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG24:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -4860,80 +4860,80 @@ void range_for_collapsed() { // CHECK5-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 // CHECK5-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 // CHECK5-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8, !dbg [[DBG24:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8, !dbg [[DBG24]] -// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8, !dbg [[DBG24]] -// CHECK5-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8, !dbg [[DBG24]] -// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG25:![0-9]+]] -// CHECK5-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG25]] -// CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG25]] -// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG25]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG24]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG24]] -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB6:[0-9]+]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG24]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG25]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423, !dbg [[DBG25]] -// CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8, !dbg [[DBG25:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8, !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8, !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8, !dbg [[DBG25]] +// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG26:![0-9]+]] +// CHECK5-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG25]] +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB6:[0-9]+]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423, !dbg [[DBG26]] +// CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG26]] // CHECK5: cond.true: -// CHECK5-NEXT: br label [[COND_END:%.*]], !dbg [[DBG25]] +// CHECK5-NEXT: br label [[COND_END:%.*]], !dbg [[DBG26]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG25]] -// CHECK5-NEXT: br label [[COND_END]], !dbg [[DBG25]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: br label [[COND_END]], !dbg [[DBG26]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ], !dbg [[DBG25]] -// CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG25]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG25]] -// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG25]] -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG24]] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ], !dbg [[DBG26]] +// CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG25]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG25]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG25]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]], !dbg [[DBG25]] -// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG24]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]], !dbg [[DBG26]] +// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG25]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG25]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7, !dbg [[DBG25]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]], !dbg [[DBG25]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !dbg [[DBG25]] -// CHECK5-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 8, !dbg [[DBG26:![0-9]+]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG26]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[IDXPROM]], !dbg [[DBG26]] -// CHECK5-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[TMP15:%.*]] = load float*, float** [[TMP2]], align 8, !dbg [[DBG26]] -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64, !dbg [[DBG26]] -// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP15]], i64 [[IDXPROM2]], !dbg [[DBG26]] -// CHECK5-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]], !dbg [[DBG26]] -// CHECK5-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP3]], align 8, !dbg [[DBG26]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG26]] -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM5]], !dbg [[DBG26]] -// CHECK5-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]], !dbg [[DBG26]] -// CHECK5-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 8, !dbg [[DBG26]] -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG26]] -// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM8]], !dbg [[DBG26]] -// CHECK5-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4, !dbg [[DBG26]] -// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG27:![0-9]+]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7, !dbg [[DBG26]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 33, [[MUL]], !dbg [[DBG26]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 8, !dbg [[DBG27:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG27]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG27]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[IDXPROM]], !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP15:%.*]] = load float*, float** [[TMP2]], align 8, !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG27]] +// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64, !dbg [[DBG27]] +// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP15]], i64 [[IDXPROM2]], !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !dbg [[DBG27]] +// CHECK5-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]], !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP3]], align 8, !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG27]] +// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG27]] +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM5]], !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !dbg [[DBG27]] +// CHECK5-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]], !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 8, !dbg [[DBG27]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG27]] +// CHECK5-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG27]] +// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM8]], !dbg [[DBG27]] +// CHECK5-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4, !dbg [[DBG27]] +// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG28:![0-9]+]] // CHECK5: omp.body.continue: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG24]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG25]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG25]] -// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], 1, !dbg [[DBG25]] -// CHECK5-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG25]] -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG24]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP23]], 1, !dbg [[DBG26]] +// CHECK5-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG26]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG25]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK5: omp.inner.for.end: -// CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG24]] +// CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG25]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB8:[0-9]+]], i32 [[TMP5]]), !dbg [[DBG24]] -// CHECK5-NEXT: ret void, !dbg [[DBG27]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB8:[0-9]+]], i32 [[TMP5]]), !dbg [[DBG25]] +// CHECK5-NEXT: ret void, !dbg [[DBG28]] // // // CHECK5-LABEL: define {{[^@]+}}@_Z18static_not_chunkedPfS_S_S_ -// CHECK5-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR0]] !dbg [[DBG29:![0-9]+]] { +// CHECK5-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR0]] !dbg [[DBG30:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 @@ -4943,12 +4943,12 @@ void range_for_collapsed() { // CHECK5-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK5-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK5-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB14:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..2 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]), !dbg [[DBG30:![0-9]+]] -// CHECK5-NEXT: ret void, !dbg [[DBG31:![0-9]+]] +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB14:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..2 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]), !dbg [[DBG31:![0-9]+]] +// CHECK5-NEXT: ret void, !dbg [[DBG32:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..2 -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], float** nonnull align 8 dereferenceable(8) [[B:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]], float** nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG32:![0-9]+]] { +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], float** nonnull align 8 dereferenceable(8) [[B:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]], float** nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG33:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -4969,80 +4969,80 @@ void range_for_collapsed() { // CHECK5-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 // CHECK5-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 // CHECK5-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8, !dbg [[DBG33:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8, !dbg [[DBG33]] -// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8, !dbg [[DBG33]] -// CHECK5-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8, !dbg [[DBG33]] -// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG34:![0-9]+]] -// CHECK5-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG34]] -// CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG34]] -// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG34]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG33]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG33]] -// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB11:[0-9]+]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG33]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG34]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423, !dbg [[DBG34]] -// CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8, !dbg [[DBG34:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8, !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8, !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8, !dbg [[DBG34]] +// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG35:![0-9]+]] +// CHECK5-NEXT: store i32 4571423, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG34]] +// CHECK5-NEXT: call void @__kmpc_for_static_init_4(%struct.ident_t* @[[GLOB11:[0-9]+]], i32 [[TMP5]], i32 34, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 1), !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sgt i32 [[TMP6]], 4571423, !dbg [[DBG35]] +// CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG35]] // CHECK5: cond.true: -// CHECK5-NEXT: br label [[COND_END:%.*]], !dbg [[DBG34]] +// CHECK5-NEXT: br label [[COND_END:%.*]], !dbg [[DBG35]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG34]] -// CHECK5-NEXT: br label [[COND_END]], !dbg [[DBG34]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: br label [[COND_END]], !dbg [[DBG35]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ], !dbg [[DBG34]] -// CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG34]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG34]] -// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG34]] -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG33]] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 4571423, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ], !dbg [[DBG35]] +// CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG34]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG34]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG34]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]], !dbg [[DBG34]] -// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG33]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]], !dbg [[DBG35]] +// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG34]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG34]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7, !dbg [[DBG34]] -// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]], !dbg [[DBG34]] -// CHECK5-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !dbg [[DBG34]] -// CHECK5-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 8, !dbg [[DBG35:![0-9]+]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG35]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[IDXPROM]], !dbg [[DBG35]] -// CHECK5-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[TMP15:%.*]] = load float*, float** [[TMP2]], align 8, !dbg [[DBG35]] -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64, !dbg [[DBG35]] -// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP15]], i64 [[IDXPROM2]], !dbg [[DBG35]] -// CHECK5-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]], !dbg [[DBG35]] -// CHECK5-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP3]], align 8, !dbg [[DBG35]] -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG35]] -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM5]], !dbg [[DBG35]] -// CHECK5-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]], !dbg [[DBG35]] -// CHECK5-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 8, !dbg [[DBG35]] -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG35]] -// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM8]], !dbg [[DBG35]] -// CHECK5-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4, !dbg [[DBG35]] -// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG36:![0-9]+]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 7, !dbg [[DBG35]] +// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 32000000, [[MUL]], !dbg [[DBG35]] +// CHECK5-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: [[TMP12:%.*]] = load float*, float** [[TMP1]], align 8, !dbg [[DBG36:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG36]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64, !dbg [[DBG36]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP12]], i64 [[IDXPROM]], !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP15:%.*]] = load float*, float** [[TMP2]], align 8, !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG36]] +// CHECK5-NEXT: [[IDXPROM2:%.*]] = sext i32 [[TMP16]] to i64, !dbg [[DBG36]] +// CHECK5-NEXT: [[ARRAYIDX3:%.*]] = getelementptr inbounds float, float* [[TMP15]], i64 [[IDXPROM2]], !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX3]], align 4, !dbg [[DBG36]] +// CHECK5-NEXT: [[MUL4:%.*]] = fmul float [[TMP14]], [[TMP17]], !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP18:%.*]] = load float*, float** [[TMP3]], align 8, !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG36]] +// CHECK5-NEXT: [[IDXPROM5:%.*]] = sext i32 [[TMP19]] to i64, !dbg [[DBG36]] +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP18]], i64 [[IDXPROM5]], !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX6]], align 4, !dbg [[DBG36]] +// CHECK5-NEXT: [[MUL7:%.*]] = fmul float [[MUL4]], [[TMP20]], !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP21:%.*]] = load float*, float** [[TMP0]], align 8, !dbg [[DBG36]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG36]] +// CHECK5-NEXT: [[IDXPROM8:%.*]] = sext i32 [[TMP22]] to i64, !dbg [[DBG36]] +// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP21]], i64 [[IDXPROM8]], !dbg [[DBG36]] +// CHECK5-NEXT: store float [[MUL7]], float* [[ARRAYIDX9]], align 4, !dbg [[DBG36]] +// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG37:![0-9]+]] // CHECK5: omp.body.continue: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG33]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG34]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG34]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1, !dbg [[DBG34]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG34]] -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG33]], !llvm.loop [[LOOP37:![0-9]+]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], 1, !dbg [[DBG35]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG35]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG34]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK5: omp.inner.for.end: -// CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG33]] +// CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]], !dbg [[DBG34]] // CHECK5: omp.loop.exit: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB13:[0-9]+]], i32 [[TMP5]]), !dbg [[DBG33]] -// CHECK5-NEXT: ret void, !dbg [[DBG36]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB13:[0-9]+]], i32 [[TMP5]]), !dbg [[DBG34]] +// CHECK5-NEXT: ret void, !dbg [[DBG37]] // // // CHECK5-LABEL: define {{[^@]+}}@_Z14static_chunkedPfS_S_S_ -// CHECK5-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR0]] !dbg [[DBG38:![0-9]+]] { +// CHECK5-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR0]] !dbg [[DBG39:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 @@ -5052,12 +5052,12 @@ void range_for_collapsed() { // CHECK5-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK5-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK5-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB19:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]), !dbg [[DBG39:![0-9]+]] -// CHECK5-NEXT: ret void, !dbg [[DBG40:![0-9]+]] +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB19:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..3 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]), !dbg [[DBG40:![0-9]+]] +// CHECK5-NEXT: ret void, !dbg [[DBG41:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..3 -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], float** nonnull align 8 dereferenceable(8) [[B:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]], float** nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG41:![0-9]+]] { +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], float** nonnull align 8 dereferenceable(8) [[B:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]], float** nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG42:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -5078,97 +5078,97 @@ void range_for_collapsed() { // CHECK5-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 // CHECK5-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 // CHECK5-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8, !dbg [[DBG42:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8, !dbg [[DBG42]] -// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8, !dbg [[DBG42]] -// CHECK5-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8, !dbg [[DBG42]] -// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG43:![0-9]+]] -// CHECK5-NEXT: store i32 16908288, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG43]] -// CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG43]] -// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG43]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG42]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG42]] -// CHECK5-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB16:[0-9]+]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5), !dbg [[DBG42]] -// CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG42]] +// CHECK5-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8, !dbg [[DBG43:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8, !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8, !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8, !dbg [[DBG43]] +// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG44:![0-9]+]] +// CHECK5-NEXT: store i32 16908288, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG43]] +// CHECK5-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB16:[0-9]+]], i32 [[TMP5]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5), !dbg [[DBG43]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG43]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG43]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288, !dbg [[DBG43]] -// CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP6]], 16908288, !dbg [[DBG44]] +// CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG44]] // CHECK5: cond.true: -// CHECK5-NEXT: br label [[COND_END:%.*]], !dbg [[DBG43]] +// CHECK5-NEXT: br label [[COND_END:%.*]], !dbg [[DBG44]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG43]] -// CHECK5-NEXT: br label [[COND_END]], !dbg [[DBG43]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: br label [[COND_END]], !dbg [[DBG44]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ], !dbg [[DBG43]] -// CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG43]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG43]] -// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG43]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG43]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG43]] -// CHECK5-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]], !dbg [[DBG43]] -// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG42]] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ], !dbg [[DBG44]] +// CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[CMP1:%.*]] = icmp ule i32 [[TMP9]], [[TMP10]], !dbg [[DBG44]] +// CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG43]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG42]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG43]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG43]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG43]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]], !dbg [[DBG43]] -// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG42]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP11]], [[TMP12]], !dbg [[DBG44]] +// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG43]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG43]] -// CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127, !dbg [[DBG43]] -// CHECK5-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]], !dbg [[DBG43]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !dbg [[DBG43]] -// CHECK5-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !dbg [[DBG44:![0-9]+]] -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64, !dbg [[DBG44]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]], !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64, !dbg [[DBG44]] -// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM3]], !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]], !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64, !dbg [[DBG44]] -// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM6]], !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]], !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !dbg [[DBG44]] -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64, !dbg [[DBG44]] -// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM9]], !dbg [[DBG44]] -// CHECK5-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !dbg [[DBG44]] -// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG45:![0-9]+]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP13]], 127, !dbg [[DBG44]] +// CHECK5-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]], !dbg [[DBG44]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP1]], align 8, !dbg [[DBG45:![0-9]+]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG45]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP15]] to i64, !dbg [[DBG45]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[IDXPROM]], !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP2]], align 8, !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG45]] +// CHECK5-NEXT: [[IDXPROM3:%.*]] = zext i32 [[TMP18]] to i64, !dbg [[DBG45]] +// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[IDXPROM3]], !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !dbg [[DBG45]] +// CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[TMP16]], [[TMP19]], !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP3]], align 8, !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG45]] +// CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i32 [[TMP21]] to i64, !dbg [[DBG45]] +// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[IDXPROM6]], !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP22:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !dbg [[DBG45]] +// CHECK5-NEXT: [[MUL8:%.*]] = fmul float [[MUL5]], [[TMP22]], !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP0]], align 8, !dbg [[DBG45]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG45]] +// CHECK5-NEXT: [[IDXPROM9:%.*]] = zext i32 [[TMP24]] to i64, !dbg [[DBG45]] +// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM9]], !dbg [[DBG45]] +// CHECK5-NEXT: store float [[MUL8]], float* [[ARRAYIDX10]], align 4, !dbg [[DBG45]] +// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG46:![0-9]+]] // CHECK5: omp.body.continue: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG42]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG43]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG43]] -// CHECK5-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1, !dbg [[DBG43]] -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG43]] -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG42]], !llvm.loop [[LOOP46:![0-9]+]] +// CHECK5-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[ADD11:%.*]] = add i32 [[TMP25]], 1, !dbg [[DBG44]] +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG43]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK5: omp.inner.for.end: -// CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG42]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG43]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG43]] -// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG43]] -// CHECK5-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]], !dbg [[DBG43]] -// CHECK5-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_LB]], align 4, !dbg [[DBG43]] -// CHECK5-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG43]] -// CHECK5-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG43]] -// CHECK5-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]], !dbg [[DBG43]] -// CHECK5-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG43]] -// CHECK5-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG42]], !llvm.loop [[LOOP47:![0-9]+]] +// CHECK5-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[ADD12:%.*]] = add i32 [[TMP26]], [[TMP27]], !dbg [[DBG44]] +// CHECK5-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_LB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: [[ADD13:%.*]] = add i32 [[TMP28]], [[TMP29]], !dbg [[DBG44]] +// CHECK5-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG44]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG43]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB18:[0-9]+]], i32 [[TMP5]]), !dbg [[DBG42]] -// CHECK5-NEXT: ret void, !dbg [[DBG45]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB18:[0-9]+]], i32 [[TMP5]]), !dbg [[DBG43]] +// CHECK5-NEXT: ret void, !dbg [[DBG46]] // // // CHECK5-LABEL: define {{[^@]+}}@_Z8dynamic1PfS_S_S_ -// CHECK5-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR0]] !dbg [[DBG48:![0-9]+]] { +// CHECK5-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR0]] !dbg [[DBG49:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 @@ -5178,12 +5178,12 @@ void range_for_collapsed() { // CHECK5-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK5-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK5-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB21:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..4 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]), !dbg [[DBG49:![0-9]+]] -// CHECK5-NEXT: ret void, !dbg [[DBG50:![0-9]+]] +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB21:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..4 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]), !dbg [[DBG50:![0-9]+]] +// CHECK5-NEXT: ret void, !dbg [[DBG51:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..4 -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], float** nonnull align 8 dereferenceable(8) [[B:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]], float** nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG51:![0-9]+]] { +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], float** nonnull align 8 dereferenceable(8) [[B:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]], float** nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG52:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -5204,73 +5204,73 @@ void range_for_collapsed() { // CHECK5-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 // CHECK5-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 // CHECK5-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8, !dbg [[DBG52:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8, !dbg [[DBG52]] -// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8, !dbg [[DBG52]] -// CHECK5-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8, !dbg [[DBG52]] -// CHECK5-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG53:![0-9]+]] -// CHECK5-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG53]] -// CHECK5-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8, !dbg [[DBG53]] -// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG53]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG52]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG52]] -// CHECK5-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB21]], i32 [[TMP5]], i32 1073741859, i64 0, i64 16908287, i64 1, i64 1), !dbg [[DBG52]] -// CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG52]] +// CHECK5-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8, !dbg [[DBG53:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8, !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8, !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8, !dbg [[DBG53]] +// CHECK5-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG54:![0-9]+]] +// CHECK5-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG54]] +// CHECK5-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8, !dbg [[DBG54]] +// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG54]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG53]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG53]] +// CHECK5-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB21]], i32 [[TMP5]], i32 1073741859, i64 0, i64 16908287, i64 1, i64 1), !dbg [[DBG53]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG53]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB21]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]), !dbg [[DBG52]] -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0, !dbg [[DBG52]] -// CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG52]] +// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB21]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]), !dbg [[DBG53]] +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0, !dbg [[DBG53]] +// CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG53]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG53]] -// CHECK5-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_IV]], align 8, !dbg [[DBG53]] -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG52]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG54]] +// CHECK5-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_IV]], align 8, !dbg [[DBG54]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG53]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG53]], !llvm.access.group !54 -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG53]], !llvm.access.group !54 -// CHECK5-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1, !dbg [[DBG53]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]], !dbg [[DBG53]] -// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG52]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG54]], !llvm.access.group !55 +// CHECK5-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG54]], !llvm.access.group !55 +// CHECK5-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1, !dbg [[DBG54]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]], !dbg [[DBG54]] +// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG53]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG53]], !llvm.access.group !54 -// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127, !dbg [[DBG53]] -// CHECK5-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]], !dbg [[DBG53]] -// CHECK5-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !dbg [[DBG53]], !llvm.access.group !54 -// CHECK5-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !dbg [[DBG55:![0-9]+]], !llvm.access.group !54 -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG55]], !llvm.access.group !54 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP12]], !dbg [[DBG55]] -// CHECK5-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG55]], !llvm.access.group !54 -// CHECK5-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !dbg [[DBG55]], !llvm.access.group !54 -// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG55]], !llvm.access.group !54 -// CHECK5-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP15]], !dbg [[DBG55]] -// CHECK5-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !dbg [[DBG55]], !llvm.access.group !54 -// CHECK5-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]], !dbg [[DBG55]] -// CHECK5-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !dbg [[DBG55]], !llvm.access.group !54 -// CHECK5-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG55]], !llvm.access.group !54 -// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[TMP18]], !dbg [[DBG55]] -// CHECK5-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !dbg [[DBG55]], !llvm.access.group !54 -// CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]], !dbg [[DBG55]] -// CHECK5-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !dbg [[DBG55]], !llvm.access.group !54 -// CHECK5-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG55]], !llvm.access.group !54 -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[TMP21]], !dbg [[DBG55]] -// CHECK5-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !dbg [[DBG55]], !llvm.access.group !54 -// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG56:![0-9]+]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG54]], !llvm.access.group !55 +// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127, !dbg [[DBG54]] +// CHECK5-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]], !dbg [[DBG54]] +// CHECK5-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !dbg [[DBG54]], !llvm.access.group !55 +// CHECK5-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !dbg [[DBG56:![0-9]+]], !llvm.access.group !55 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG56]], !llvm.access.group !55 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP12]], !dbg [[DBG56]] +// CHECK5-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG56]], !llvm.access.group !55 +// CHECK5-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !dbg [[DBG56]], !llvm.access.group !55 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG56]], !llvm.access.group !55 +// CHECK5-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP15]], !dbg [[DBG56]] +// CHECK5-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !dbg [[DBG56]], !llvm.access.group !55 +// CHECK5-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]], !dbg [[DBG56]] +// CHECK5-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !dbg [[DBG56]], !llvm.access.group !55 +// CHECK5-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG56]], !llvm.access.group !55 +// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[TMP18]], !dbg [[DBG56]] +// CHECK5-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !dbg [[DBG56]], !llvm.access.group !55 +// CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]], !dbg [[DBG56]] +// CHECK5-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !dbg [[DBG56]], !llvm.access.group !55 +// CHECK5-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG56]], !llvm.access.group !55 +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[TMP21]], !dbg [[DBG56]] +// CHECK5-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !dbg [[DBG56]], !llvm.access.group !55 +// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG57:![0-9]+]] // CHECK5: omp.body.continue: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG52]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG53]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG53]], !llvm.access.group !54 -// CHECK5-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1, !dbg [[DBG53]] -// CHECK5-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !dbg [[DBG53]], !llvm.access.group !54 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG52]], !llvm.loop [[LOOP57:![0-9]+]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG54]], !llvm.access.group !55 +// CHECK5-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1, !dbg [[DBG54]] +// CHECK5-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !dbg [[DBG54]], !llvm.access.group !55 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG53]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK5: omp.inner.for.end: -// CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG52]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG53]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG52]], !llvm.loop [[LOOP59:![0-9]+]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG53]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: ret void, !dbg [[DBG56]] +// CHECK5-NEXT: ret void, !dbg [[DBG57]] // // // CHECK5-LABEL: define {{[^@]+}}@_Z7guided7PfS_S_S_ -// CHECK5-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR0]] !dbg [[DBG60:![0-9]+]] { +// CHECK5-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR0]] !dbg [[DBG61:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 @@ -5280,12 +5280,12 @@ void range_for_collapsed() { // CHECK5-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK5-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK5-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB23:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..5 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]), !dbg [[DBG61:![0-9]+]] -// CHECK5-NEXT: ret void, !dbg [[DBG62:![0-9]+]] +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB23:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..5 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]), !dbg [[DBG62:![0-9]+]] +// CHECK5-NEXT: ret void, !dbg [[DBG63:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..5 -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], float** nonnull align 8 dereferenceable(8) [[B:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]], float** nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG63:![0-9]+]] { +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], float** nonnull align 8 dereferenceable(8) [[B:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]], float** nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG64:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -5306,73 +5306,73 @@ void range_for_collapsed() { // CHECK5-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 // CHECK5-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 // CHECK5-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8, !dbg [[DBG64:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8, !dbg [[DBG64]] -// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8, !dbg [[DBG64]] -// CHECK5-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8, !dbg [[DBG64]] -// CHECK5-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG65:![0-9]+]] -// CHECK5-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG65]] -// CHECK5-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8, !dbg [[DBG65]] -// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG65]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG64]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG64]] -// CHECK5-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB23]], i32 [[TMP5]], i32 1073741860, i64 0, i64 16908287, i64 1, i64 7), !dbg [[DBG64]] -// CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG64]] +// CHECK5-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8, !dbg [[DBG65:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8, !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8, !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8, !dbg [[DBG65]] +// CHECK5-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG66:![0-9]+]] +// CHECK5-NEXT: store i64 16908287, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG66]] +// CHECK5-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8, !dbg [[DBG66]] +// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG66]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG65]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG65]] +// CHECK5-NEXT: call void @__kmpc_dispatch_init_8u(%struct.ident_t* @[[GLOB23]], i32 [[TMP5]], i32 1073741860, i64 0, i64 16908287, i64 1, i64 7), !dbg [[DBG65]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG65]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB23]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]), !dbg [[DBG64]] -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0, !dbg [[DBG64]] -// CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG64]] +// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_8u(%struct.ident_t* @[[GLOB23]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]), !dbg [[DBG65]] +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0, !dbg [[DBG65]] +// CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG65]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG65]] -// CHECK5-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_IV]], align 8, !dbg [[DBG65]] -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG64]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG66]] +// CHECK5-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_IV]], align 8, !dbg [[DBG66]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG65]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG65]], !llvm.access.group !66 -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG65]], !llvm.access.group !66 -// CHECK5-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1, !dbg [[DBG65]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]], !dbg [[DBG65]] -// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG64]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG66]], !llvm.access.group !67 +// CHECK5-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG66]], !llvm.access.group !67 +// CHECK5-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1, !dbg [[DBG66]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]], !dbg [[DBG66]] +// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG65]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG65]], !llvm.access.group !66 -// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127, !dbg [[DBG65]] -// CHECK5-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]], !dbg [[DBG65]] -// CHECK5-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !dbg [[DBG65]], !llvm.access.group !66 -// CHECK5-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !dbg [[DBG67:![0-9]+]], !llvm.access.group !66 -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG67]], !llvm.access.group !66 -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP12]], !dbg [[DBG67]] -// CHECK5-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG67]], !llvm.access.group !66 -// CHECK5-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !dbg [[DBG67]], !llvm.access.group !66 -// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG67]], !llvm.access.group !66 -// CHECK5-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP15]], !dbg [[DBG67]] -// CHECK5-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !dbg [[DBG67]], !llvm.access.group !66 -// CHECK5-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]], !dbg [[DBG67]] -// CHECK5-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !dbg [[DBG67]], !llvm.access.group !66 -// CHECK5-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG67]], !llvm.access.group !66 -// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[TMP18]], !dbg [[DBG67]] -// CHECK5-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !dbg [[DBG67]], !llvm.access.group !66 -// CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]], !dbg [[DBG67]] -// CHECK5-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !dbg [[DBG67]], !llvm.access.group !66 -// CHECK5-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG67]], !llvm.access.group !66 -// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[TMP21]], !dbg [[DBG67]] -// CHECK5-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !dbg [[DBG67]], !llvm.access.group !66 -// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG68:![0-9]+]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG66]], !llvm.access.group !67 +// CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127, !dbg [[DBG66]] +// CHECK5-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]], !dbg [[DBG66]] +// CHECK5-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !dbg [[DBG66]], !llvm.access.group !67 +// CHECK5-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !dbg [[DBG68:![0-9]+]], !llvm.access.group !67 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG68]], !llvm.access.group !67 +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP12]], !dbg [[DBG68]] +// CHECK5-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG68]], !llvm.access.group !67 +// CHECK5-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !dbg [[DBG68]], !llvm.access.group !67 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG68]], !llvm.access.group !67 +// CHECK5-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP15]], !dbg [[DBG68]] +// CHECK5-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !dbg [[DBG68]], !llvm.access.group !67 +// CHECK5-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]], !dbg [[DBG68]] +// CHECK5-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !dbg [[DBG68]], !llvm.access.group !67 +// CHECK5-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG68]], !llvm.access.group !67 +// CHECK5-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[TMP18]], !dbg [[DBG68]] +// CHECK5-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !dbg [[DBG68]], !llvm.access.group !67 +// CHECK5-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]], !dbg [[DBG68]] +// CHECK5-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !dbg [[DBG68]], !llvm.access.group !67 +// CHECK5-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !dbg [[DBG68]], !llvm.access.group !67 +// CHECK5-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[TMP21]], !dbg [[DBG68]] +// CHECK5-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !dbg [[DBG68]], !llvm.access.group !67 +// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG69:![0-9]+]] // CHECK5: omp.body.continue: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG64]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG65]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG65]], !llvm.access.group !66 -// CHECK5-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1, !dbg [[DBG65]] -// CHECK5-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !dbg [[DBG65]], !llvm.access.group !66 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG64]], !llvm.loop [[LOOP69:![0-9]+]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG66]], !llvm.access.group !67 +// CHECK5-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1, !dbg [[DBG66]] +// CHECK5-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !dbg [[DBG66]], !llvm.access.group !67 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG65]], !llvm.loop [[LOOP70:![0-9]+]] // CHECK5: omp.inner.for.end: -// CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG64]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG65]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG64]], !llvm.loop [[LOOP71:![0-9]+]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG65]], !llvm.loop [[LOOP72:![0-9]+]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: ret void, !dbg [[DBG68]] +// CHECK5-NEXT: ret void, !dbg [[DBG69]] // // // CHECK5-LABEL: define {{[^@]+}}@_Z9test_autoPfS_S_S_ -// CHECK5-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR0]] !dbg [[DBG72:![0-9]+]] { +// CHECK5-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR0]] !dbg [[DBG73:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 @@ -5384,14 +5384,14 @@ void range_for_collapsed() { // CHECK5-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK5-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK5-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK5-NEXT: store i32 0, i32* [[X]], align 4, !dbg [[DBG73:![0-9]+]] -// CHECK5-NEXT: store i32 0, i32* [[Y]], align 4, !dbg [[DBG74:![0-9]+]] -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB25:[0-9]+]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[Y]], float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]), !dbg [[DBG75:![0-9]+]] -// CHECK5-NEXT: ret void, !dbg [[DBG76:![0-9]+]] +// CHECK5-NEXT: store i32 0, i32* [[X]], align 4, !dbg [[DBG74:![0-9]+]] +// CHECK5-NEXT: store i32 0, i32* [[Y]], align 4, !dbg [[DBG75:![0-9]+]] +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB25:[0-9]+]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32* [[Y]], float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]), !dbg [[DBG76:![0-9]+]] +// CHECK5-NEXT: ret void, !dbg [[DBG77:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..6 -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[Y:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], float** nonnull align 8 dereferenceable(8) [[B:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]], float** nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG77:![0-9]+]] { +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], i32* nonnull align 4 dereferenceable(4) [[Y:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], float** nonnull align 8 dereferenceable(8) [[B:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]], float** nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG78:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -5420,116 +5420,116 @@ void range_for_collapsed() { // CHECK5-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 // CHECK5-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 // CHECK5-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[Y_ADDR]], align 8, !dbg [[DBG78:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load float**, float*** [[A_ADDR]], align 8, !dbg [[DBG78]] -// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[B_ADDR]], align 8, !dbg [[DBG78]] -// CHECK5-NEXT: [[TMP3:%.*]] = load float**, float*** [[C_ADDR]], align 8, !dbg [[DBG78]] -// CHECK5-NEXT: [[TMP4:%.*]] = load float**, float*** [[D_ADDR]], align 8, !dbg [[DBG78]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !dbg [[DBG79:![0-9]+]] -// CHECK5-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8, !dbg [[DBG79]] -// CHECK5-NEXT: store i8 [[CONV]], i8* [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG79]] -// CHECK5-NEXT: [[TMP6:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG79]] -// CHECK5-NEXT: [[CONV3:%.*]] = sext i8 [[TMP6]] to i32, !dbg [[DBG79]] -// CHECK5-NEXT: [[SUB:%.*]] = sub i32 57, [[CONV3]], !dbg [[DBG79]] -// CHECK5-NEXT: [[ADD:%.*]] = add i32 [[SUB]], 1, !dbg [[DBG79]] -// CHECK5-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1, !dbg [[DBG79]] -// CHECK5-NEXT: [[CONV4:%.*]] = zext i32 [[DIV]] to i64, !dbg [[DBG79]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV4]], 11, !dbg [[DBG80:![0-9]+]] -// CHECK5-NEXT: [[SUB5:%.*]] = sub nsw i64 [[MUL]], 1, !dbg [[DBG80]] -// CHECK5-NEXT: store i64 [[SUB5]], i64* [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG79]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG79]] -// CHECK5-NEXT: store i8 [[TMP7]], i8* [[I]], align 1, !dbg [[DBG79]] -// CHECK5-NEXT: store i32 11, i32* [[X]], align 4, !dbg [[DBG80]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG79]] -// CHECK5-NEXT: [[CONV6:%.*]] = sext i8 [[TMP8]] to i32, !dbg [[DBG79]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[CONV6]], 57, !dbg [[DBG79]] -// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]], !dbg [[DBG78]] +// CHECK5-NEXT: [[TMP0:%.*]] = load i32*, i32** [[Y_ADDR]], align 8, !dbg [[DBG79:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = load float**, float*** [[A_ADDR]], align 8, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[B_ADDR]], align 8, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP3:%.*]] = load float**, float*** [[C_ADDR]], align 8, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP4:%.*]] = load float**, float*** [[D_ADDR]], align 8, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP0]], align 4, !dbg [[DBG80:![0-9]+]] +// CHECK5-NEXT: [[CONV:%.*]] = trunc i32 [[TMP5]] to i8, !dbg [[DBG80]] +// CHECK5-NEXT: store i8 [[CONV]], i8* [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]] +// CHECK5-NEXT: [[CONV3:%.*]] = sext i8 [[TMP6]] to i32, !dbg [[DBG80]] +// CHECK5-NEXT: [[SUB:%.*]] = sub i32 57, [[CONV3]], !dbg [[DBG80]] +// CHECK5-NEXT: [[ADD:%.*]] = add i32 [[SUB]], 1, !dbg [[DBG80]] +// CHECK5-NEXT: [[DIV:%.*]] = udiv i32 [[ADD]], 1, !dbg [[DBG80]] +// CHECK5-NEXT: [[CONV4:%.*]] = zext i32 [[DIV]] to i64, !dbg [[DBG80]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i64 [[CONV4]], 11, !dbg [[DBG81:![0-9]+]] +// CHECK5-NEXT: [[SUB5:%.*]] = sub nsw i64 [[MUL]], 1, !dbg [[DBG81]] +// CHECK5-NEXT: store i64 [[SUB5]], i64* [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]] +// CHECK5-NEXT: store i8 [[TMP7]], i8* [[I]], align 1, !dbg [[DBG80]] +// CHECK5-NEXT: store i32 11, i32* [[X]], align 4, !dbg [[DBG81]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]] +// CHECK5-NEXT: [[CONV6:%.*]] = sext i8 [[TMP8]] to i32, !dbg [[DBG80]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[CONV6]], 57, !dbg [[DBG80]] +// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]], !dbg [[DBG79]] // CHECK5: omp.precond.then: -// CHECK5-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG79]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG80]] -// CHECK5-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_UB]], align 8, !dbg [[DBG79]] -// CHECK5-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8, !dbg [[DBG79]] -// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG79]] -// CHECK5-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG80]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG78]] -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4, !dbg [[DBG78]] -// CHECK5-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB25]], i32 [[TMP12]], i32 1073741862, i64 0, i64 [[TMP10]], i64 1, i64 1), !dbg [[DBG78]] -// CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG78]] +// CHECK5-NEXT: store i64 0, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG81]] +// CHECK5-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_UB]], align 8, !dbg [[DBG80]] +// CHECK5-NEXT: store i64 1, i64* [[DOTOMP_STRIDE]], align 8, !dbg [[DBG80]] +// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR_2]], align 8, !dbg [[DBG81]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4, !dbg [[DBG79]] +// CHECK5-NEXT: call void @__kmpc_dispatch_init_8(%struct.ident_t* @[[GLOB25]], i32 [[TMP12]], i32 1073741862, i64 0, i64 [[TMP10]], i64 1, i64 1), !dbg [[DBG79]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG79]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG78]] -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4, !dbg [[DBG78]] -// CHECK5-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB25]], i32 [[TMP14]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]), !dbg [[DBG78]] -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0, !dbg [[DBG78]] -// CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG78]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[TMP13]], align 4, !dbg [[DBG79]] +// CHECK5-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_dispatch_next_8(%struct.ident_t* @[[GLOB25]], i32 [[TMP14]], i32* [[DOTOMP_IS_LAST]], i64* [[DOTOMP_LB]], i64* [[DOTOMP_UB]], i64* [[DOTOMP_STRIDE]]), !dbg [[DBG79]] +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP15]], 0, !dbg [[DBG79]] +// CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG79]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG79]] -// CHECK5-NEXT: store i64 [[TMP16]], i64* [[DOTOMP_IV]], align 8, !dbg [[DBG79]] -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG78]] +// CHECK5-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_LB]], align 8, !dbg [[DBG80]] +// CHECK5-NEXT: store i64 [[TMP16]], i64* [[DOTOMP_IV]], align 8, !dbg [[DBG80]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG79]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG79]], !llvm.access.group !81 -// CHECK5-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG79]], !llvm.access.group !81 -// CHECK5-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]], !dbg [[DBG79]] -// CHECK5-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG78]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group !82 +// CHECK5-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !dbg [[DBG80]], !llvm.access.group !82 +// CHECK5-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]], !dbg [[DBG80]] +// CHECK5-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG79]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP19:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG79]], !llvm.access.group !81 -// CHECK5-NEXT: [[CONV10:%.*]] = sext i8 [[TMP19]] to i64, !dbg [[DBG79]] -// CHECK5-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG79]], !llvm.access.group !81 -// CHECK5-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP20]], 11, !dbg [[DBG79]] -// CHECK5-NEXT: [[MUL12:%.*]] = mul nsw i64 [[DIV11]], 1, !dbg [[DBG79]] -// CHECK5-NEXT: [[ADD13:%.*]] = add nsw i64 [[CONV10]], [[MUL12]], !dbg [[DBG79]] -// CHECK5-NEXT: [[CONV14:%.*]] = trunc i64 [[ADD13]] to i8, !dbg [[DBG79]] -// CHECK5-NEXT: store i8 [[CONV14]], i8* [[I7]], align 1, !dbg [[DBG79]], !llvm.access.group !81 -// CHECK5-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG79]], !llvm.access.group !81 -// CHECK5-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG79]], !llvm.access.group !81 -// CHECK5-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP22]], 11, !dbg [[DBG79]] -// CHECK5-NEXT: [[MUL16:%.*]] = mul nsw i64 [[DIV15]], 11, !dbg [[DBG79]] -// CHECK5-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP21]], [[MUL16]], !dbg [[DBG79]] -// CHECK5-NEXT: [[MUL18:%.*]] = mul nsw i64 [[SUB17]], 1, !dbg [[DBG80]] -// CHECK5-NEXT: [[SUB19:%.*]] = sub nsw i64 11, [[MUL18]], !dbg [[DBG80]] -// CHECK5-NEXT: [[CONV20:%.*]] = trunc i64 [[SUB19]] to i32, !dbg [[DBG80]] -// CHECK5-NEXT: store i32 [[CONV20]], i32* [[X8]], align 4, !dbg [[DBG80]], !llvm.access.group !81 -// CHECK5-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP2]], align 8, !dbg [[DBG82:![0-9]+]], !llvm.access.group !81 -// CHECK5-NEXT: [[TMP24:%.*]] = load i8, i8* [[I7]], align 1, !dbg [[DBG82]], !llvm.access.group !81 -// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP24]] to i64, !dbg [[DBG82]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM]], !dbg [[DBG82]] -// CHECK5-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG82]], !llvm.access.group !81 -// CHECK5-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP3]], align 8, !dbg [[DBG82]], !llvm.access.group !81 -// CHECK5-NEXT: [[TMP27:%.*]] = load i8, i8* [[I7]], align 1, !dbg [[DBG82]], !llvm.access.group !81 -// CHECK5-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP27]] to i64, !dbg [[DBG82]] -// CHECK5-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, float* [[TMP26]], i64 [[IDXPROM21]], !dbg [[DBG82]] -// CHECK5-NEXT: [[TMP28:%.*]] = load float, float* [[ARRAYIDX22]], align 4, !dbg [[DBG82]], !llvm.access.group !81 -// CHECK5-NEXT: [[MUL23:%.*]] = fmul float [[TMP25]], [[TMP28]], !dbg [[DBG82]] -// CHECK5-NEXT: [[TMP29:%.*]] = load float*, float** [[TMP4]], align 8, !dbg [[DBG82]], !llvm.access.group !81 -// CHECK5-NEXT: [[TMP30:%.*]] = load i8, i8* [[I7]], align 1, !dbg [[DBG82]], !llvm.access.group !81 -// CHECK5-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP30]] to i64, !dbg [[DBG82]] -// CHECK5-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, float* [[TMP29]], i64 [[IDXPROM24]], !dbg [[DBG82]] -// CHECK5-NEXT: [[TMP31:%.*]] = load float, float* [[ARRAYIDX25]], align 4, !dbg [[DBG82]], !llvm.access.group !81 -// CHECK5-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP31]], !dbg [[DBG82]] -// CHECK5-NEXT: [[TMP32:%.*]] = load float*, float** [[TMP1]], align 8, !dbg [[DBG82]], !llvm.access.group !81 -// CHECK5-NEXT: [[TMP33:%.*]] = load i8, i8* [[I7]], align 1, !dbg [[DBG82]], !llvm.access.group !81 -// CHECK5-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP33]] to i64, !dbg [[DBG82]] -// CHECK5-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, float* [[TMP32]], i64 [[IDXPROM27]], !dbg [[DBG82]] -// CHECK5-NEXT: store float [[MUL26]], float* [[ARRAYIDX28]], align 4, !dbg [[DBG82]], !llvm.access.group !81 -// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG83:![0-9]+]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !dbg [[DBG80]], !llvm.access.group !82 +// CHECK5-NEXT: [[CONV10:%.*]] = sext i8 [[TMP19]] to i64, !dbg [[DBG80]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group !82 +// CHECK5-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP20]], 11, !dbg [[DBG80]] +// CHECK5-NEXT: [[MUL12:%.*]] = mul nsw i64 [[DIV11]], 1, !dbg [[DBG80]] +// CHECK5-NEXT: [[ADD13:%.*]] = add nsw i64 [[CONV10]], [[MUL12]], !dbg [[DBG80]] +// CHECK5-NEXT: [[CONV14:%.*]] = trunc i64 [[ADD13]] to i8, !dbg [[DBG80]] +// CHECK5-NEXT: store i8 [[CONV14]], i8* [[I7]], align 1, !dbg [[DBG80]], !llvm.access.group !82 +// CHECK5-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group !82 +// CHECK5-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group !82 +// CHECK5-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP22]], 11, !dbg [[DBG80]] +// CHECK5-NEXT: [[MUL16:%.*]] = mul nsw i64 [[DIV15]], 11, !dbg [[DBG80]] +// CHECK5-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP21]], [[MUL16]], !dbg [[DBG80]] +// CHECK5-NEXT: [[MUL18:%.*]] = mul nsw i64 [[SUB17]], 1, !dbg [[DBG81]] +// CHECK5-NEXT: [[SUB19:%.*]] = sub nsw i64 11, [[MUL18]], !dbg [[DBG81]] +// CHECK5-NEXT: [[CONV20:%.*]] = trunc i64 [[SUB19]] to i32, !dbg [[DBG81]] +// CHECK5-NEXT: store i32 [[CONV20]], i32* [[X8]], align 4, !dbg [[DBG81]], !llvm.access.group !82 +// CHECK5-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP2]], align 8, !dbg [[DBG83:![0-9]+]], !llvm.access.group !82 +// CHECK5-NEXT: [[TMP24:%.*]] = load i8, i8* [[I7]], align 1, !dbg [[DBG83]], !llvm.access.group !82 +// CHECK5-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP24]] to i64, !dbg [[DBG83]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM]], !dbg [[DBG83]] +// CHECK5-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG83]], !llvm.access.group !82 +// CHECK5-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP3]], align 8, !dbg [[DBG83]], !llvm.access.group !82 +// CHECK5-NEXT: [[TMP27:%.*]] = load i8, i8* [[I7]], align 1, !dbg [[DBG83]], !llvm.access.group !82 +// CHECK5-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP27]] to i64, !dbg [[DBG83]] +// CHECK5-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, float* [[TMP26]], i64 [[IDXPROM21]], !dbg [[DBG83]] +// CHECK5-NEXT: [[TMP28:%.*]] = load float, float* [[ARRAYIDX22]], align 4, !dbg [[DBG83]], !llvm.access.group !82 +// CHECK5-NEXT: [[MUL23:%.*]] = fmul float [[TMP25]], [[TMP28]], !dbg [[DBG83]] +// CHECK5-NEXT: [[TMP29:%.*]] = load float*, float** [[TMP4]], align 8, !dbg [[DBG83]], !llvm.access.group !82 +// CHECK5-NEXT: [[TMP30:%.*]] = load i8, i8* [[I7]], align 1, !dbg [[DBG83]], !llvm.access.group !82 +// CHECK5-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP30]] to i64, !dbg [[DBG83]] +// CHECK5-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, float* [[TMP29]], i64 [[IDXPROM24]], !dbg [[DBG83]] +// CHECK5-NEXT: [[TMP31:%.*]] = load float, float* [[ARRAYIDX25]], align 4, !dbg [[DBG83]], !llvm.access.group !82 +// CHECK5-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP31]], !dbg [[DBG83]] +// CHECK5-NEXT: [[TMP32:%.*]] = load float*, float** [[TMP1]], align 8, !dbg [[DBG83]], !llvm.access.group !82 +// CHECK5-NEXT: [[TMP33:%.*]] = load i8, i8* [[I7]], align 1, !dbg [[DBG83]], !llvm.access.group !82 +// CHECK5-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP33]] to i64, !dbg [[DBG83]] +// CHECK5-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, float* [[TMP32]], i64 [[IDXPROM27]], !dbg [[DBG83]] +// CHECK5-NEXT: store float [[MUL26]], float* [[ARRAYIDX28]], align 4, !dbg [[DBG83]], !llvm.access.group !82 +// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG84:![0-9]+]] // CHECK5: omp.body.continue: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG78]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG79]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG79]], !llvm.access.group !81 -// CHECK5-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP34]], 1, !dbg [[DBG79]] -// CHECK5-NEXT: store i64 [[ADD29]], i64* [[DOTOMP_IV]], align 8, !dbg [[DBG79]], !llvm.access.group !81 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG78]], !llvm.loop [[LOOP84:![0-9]+]] +// CHECK5-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group !82 +// CHECK5-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP34]], 1, !dbg [[DBG80]] +// CHECK5-NEXT: store i64 [[ADD29]], i64* [[DOTOMP_IV]], align 8, !dbg [[DBG80]], !llvm.access.group !82 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG79]], !llvm.loop [[LOOP85:![0-9]+]] // CHECK5: omp.inner.for.end: -// CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG78]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG79]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG78]], !llvm.loop [[LOOP86:![0-9]+]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG79]], !llvm.loop [[LOOP87:![0-9]+]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: br label [[OMP_PRECOND_END]], !dbg [[DBG78]] +// CHECK5-NEXT: br label [[OMP_PRECOND_END]], !dbg [[DBG79]] // CHECK5: omp.precond.end: -// CHECK5-NEXT: ret void, !dbg [[DBG83]] +// CHECK5-NEXT: ret void, !dbg [[DBG84]] // // // CHECK5-LABEL: define {{[^@]+}}@_Z7runtimePfS_S_S_ -// CHECK5-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR0]] !dbg [[DBG87:![0-9]+]] { +// CHECK5-SAME: (float* [[A:%.*]], float* [[B:%.*]], float* [[C:%.*]], float* [[D:%.*]]) #[[ATTR0]] !dbg [[DBG88:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 // CHECK5-NEXT: [[B_ADDR:%.*]] = alloca float*, align 8 @@ -5540,13 +5540,13 @@ void range_for_collapsed() { // CHECK5-NEXT: store float* [[B]], float** [[B_ADDR]], align 8 // CHECK5-NEXT: store float* [[C]], float** [[C_ADDR]], align 8 // CHECK5-NEXT: store float* [[D]], float** [[D_ADDR]], align 8 -// CHECK5-NEXT: store i32 0, i32* [[X]], align 4, !dbg [[DBG88:![0-9]+]] -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB27:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]), !dbg [[DBG89:![0-9]+]] -// CHECK5-NEXT: ret void, !dbg [[DBG90:![0-9]+]] +// CHECK5-NEXT: store i32 0, i32* [[X]], align 4, !dbg [[DBG89:![0-9]+]] +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB27:[0-9]+]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, float**, float**, float**)* @.omp_outlined..7 to void (i32*, i32*, ...)*), float** [[A_ADDR]], float** [[B_ADDR]], float** [[C_ADDR]], float** [[D_ADDR]]), !dbg [[DBG90:![0-9]+]] +// CHECK5-NEXT: ret void, !dbg [[DBG91:![0-9]+]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..7 -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], float** nonnull align 8 dereferenceable(8) [[B:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]], float** nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG91:![0-9]+]] { +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], float** nonnull align 8 dereferenceable(8) [[B:%.*]], float** nonnull align 8 dereferenceable(8) [[C:%.*]], float** nonnull align 8 dereferenceable(8) [[D:%.*]]) #[[ATTR1]] !dbg [[DBG92:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -5569,93 +5569,93 @@ void range_for_collapsed() { // CHECK5-NEXT: store float** [[B]], float*** [[B_ADDR]], align 8 // CHECK5-NEXT: store float** [[C]], float*** [[C_ADDR]], align 8 // CHECK5-NEXT: store float** [[D]], float*** [[D_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8, !dbg [[DBG92:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8, !dbg [[DBG92]] -// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8, !dbg [[DBG92]] -// CHECK5-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8, !dbg [[DBG92]] -// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG93:![0-9]+]] -// CHECK5-NEXT: store i32 199, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG93]] -// CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG93]] -// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG93]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG92]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG92]] -// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB27]], i32 [[TMP5]], i32 1073741861, i32 0, i32 199, i32 1, i32 1), !dbg [[DBG92]] -// CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG92]] +// CHECK5-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8, !dbg [[DBG93:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = load float**, float*** [[B_ADDR]], align 8, !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP2:%.*]] = load float**, float*** [[C_ADDR]], align 8, !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP3:%.*]] = load float**, float*** [[D_ADDR]], align 8, !dbg [[DBG93]] +// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG94:![0-9]+]] +// CHECK5-NEXT: store i32 199, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG94]] +// CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG94]] +// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG94]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG93]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[TMP4]], align 4, !dbg [[DBG93]] +// CHECK5-NEXT: call void @__kmpc_dispatch_init_4(%struct.ident_t* @[[GLOB27]], i32 [[TMP5]], i32 1073741861, i32 0, i32 199, i32 1, i32 1), !dbg [[DBG93]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG93]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB27]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]), !dbg [[DBG92]] -// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0, !dbg [[DBG92]] -// CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG92]] +// CHECK5-NEXT: [[TMP6:%.*]] = call i32 @__kmpc_dispatch_next_4(%struct.ident_t* @[[GLOB27]], i32 [[TMP5]], i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]]), !dbg [[DBG93]] +// CHECK5-NEXT: [[TOBOOL:%.*]] = icmp ne i32 [[TMP6]], 0, !dbg [[DBG93]] +// CHECK5-NEXT: br i1 [[TOBOOL]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG93]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG93]] -// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG93]] -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG92]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG94]] +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG94]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG93]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG93]], !llvm.access.group !94 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG93]], !llvm.access.group !94 -// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]], !dbg [[DBG93]] -// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG92]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group !95 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG94]], !llvm.access.group !95 +// CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]], !dbg [[DBG94]] +// CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG93]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG93]], !llvm.access.group !94 -// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 20, !dbg [[DBG93]] -// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1, !dbg [[DBG93]] -// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 48, [[MUL]], !dbg [[DBG93]] -// CHECK5-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i8, !dbg [[DBG93]] -// CHECK5-NEXT: store i8 [[CONV]], i8* [[I]], align 1, !dbg [[DBG93]], !llvm.access.group !94 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG93]], !llvm.access.group !94 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG93]], !llvm.access.group !94 -// CHECK5-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP12]], 20, !dbg [[DBG93]] -// CHECK5-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 20, !dbg [[DBG93]] -// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL3]], !dbg [[DBG93]] -// CHECK5-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1, !dbg [[DBG95:![0-9]+]] -// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 -10, [[MUL4]], !dbg [[DBG95]] -// CHECK5-NEXT: store i32 [[ADD5]], i32* [[X]], align 4, !dbg [[DBG95]], !llvm.access.group !94 -// CHECK5-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8, !dbg [[DBG96:![0-9]+]], !llvm.access.group !94 -// CHECK5-NEXT: [[TMP14:%.*]] = load i8, i8* [[I]], align 1, !dbg [[DBG96]], !llvm.access.group !94 -// CHECK5-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP14]] to i64, !dbg [[DBG96]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 [[IDXPROM]], !dbg [[DBG96]] -// CHECK5-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG96]], !llvm.access.group !94 -// CHECK5-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8, !dbg [[DBG96]], !llvm.access.group !94 -// CHECK5-NEXT: [[TMP17:%.*]] = load i8, i8* [[I]], align 1, !dbg [[DBG96]], !llvm.access.group !94 -// CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP17]] to i64, !dbg [[DBG96]] -// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM6]], !dbg [[DBG96]] -// CHECK5-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !dbg [[DBG96]], !llvm.access.group !94 -// CHECK5-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP18]], !dbg [[DBG96]] -// CHECK5-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8, !dbg [[DBG96]], !llvm.access.group !94 -// CHECK5-NEXT: [[TMP20:%.*]] = load i8, i8* [[I]], align 1, !dbg [[DBG96]], !llvm.access.group !94 -// CHECK5-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP20]] to i64, !dbg [[DBG96]] -// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM9]], !dbg [[DBG96]] -// CHECK5-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !dbg [[DBG96]], !llvm.access.group !94 -// CHECK5-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP21]], !dbg [[DBG96]] -// CHECK5-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8, !dbg [[DBG96]], !llvm.access.group !94 -// CHECK5-NEXT: [[TMP23:%.*]] = load i8, i8* [[I]], align 1, !dbg [[DBG96]], !llvm.access.group !94 -// CHECK5-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP23]] to i64, !dbg [[DBG96]] -// CHECK5-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM12]], !dbg [[DBG96]] -// CHECK5-NEXT: store float [[MUL11]], float* [[ARRAYIDX13]], align 4, !dbg [[DBG96]], !llvm.access.group !94 -// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG97:![0-9]+]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group !95 +// CHECK5-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 20, !dbg [[DBG94]] +// CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1, !dbg [[DBG94]] +// CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 48, [[MUL]], !dbg [[DBG94]] +// CHECK5-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i8, !dbg [[DBG94]] +// CHECK5-NEXT: store i8 [[CONV]], i8* [[I]], align 1, !dbg [[DBG94]], !llvm.access.group !95 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group !95 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group !95 +// CHECK5-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP12]], 20, !dbg [[DBG94]] +// CHECK5-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 20, !dbg [[DBG94]] +// CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL3]], !dbg [[DBG94]] +// CHECK5-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1, !dbg [[DBG96:![0-9]+]] +// CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 -10, [[MUL4]], !dbg [[DBG96]] +// CHECK5-NEXT: store i32 [[ADD5]], i32* [[X]], align 4, !dbg [[DBG96]], !llvm.access.group !95 +// CHECK5-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8, !dbg [[DBG97:![0-9]+]], !llvm.access.group !95 +// CHECK5-NEXT: [[TMP14:%.*]] = load i8, i8* [[I]], align 1, !dbg [[DBG97]], !llvm.access.group !95 +// CHECK5-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP14]] to i64, !dbg [[DBG97]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 [[IDXPROM]], !dbg [[DBG97]] +// CHECK5-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG97]], !llvm.access.group !95 +// CHECK5-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8, !dbg [[DBG97]], !llvm.access.group !95 +// CHECK5-NEXT: [[TMP17:%.*]] = load i8, i8* [[I]], align 1, !dbg [[DBG97]], !llvm.access.group !95 +// CHECK5-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP17]] to i64, !dbg [[DBG97]] +// CHECK5-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM6]], !dbg [[DBG97]] +// CHECK5-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !dbg [[DBG97]], !llvm.access.group !95 +// CHECK5-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP18]], !dbg [[DBG97]] +// CHECK5-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8, !dbg [[DBG97]], !llvm.access.group !95 +// CHECK5-NEXT: [[TMP20:%.*]] = load i8, i8* [[I]], align 1, !dbg [[DBG97]], !llvm.access.group !95 +// CHECK5-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP20]] to i64, !dbg [[DBG97]] +// CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM9]], !dbg [[DBG97]] +// CHECK5-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !dbg [[DBG97]], !llvm.access.group !95 +// CHECK5-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP21]], !dbg [[DBG97]] +// CHECK5-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8, !dbg [[DBG97]], !llvm.access.group !95 +// CHECK5-NEXT: [[TMP23:%.*]] = load i8, i8* [[I]], align 1, !dbg [[DBG97]], !llvm.access.group !95 +// CHECK5-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP23]] to i64, !dbg [[DBG97]] +// CHECK5-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM12]], !dbg [[DBG97]] +// CHECK5-NEXT: store float [[MUL11]], float* [[ARRAYIDX13]], align 4, !dbg [[DBG97]], !llvm.access.group !95 +// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG98:![0-9]+]] // CHECK5: omp.body.continue: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG92]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG93]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG93]], !llvm.access.group !94 -// CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], 1, !dbg [[DBG93]] -// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG93]], !llvm.access.group !94 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG92]], !llvm.loop [[LOOP98:![0-9]+]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group !95 +// CHECK5-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], 1, !dbg [[DBG94]] +// CHECK5-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG94]], !llvm.access.group !95 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG93]], !llvm.loop [[LOOP99:![0-9]+]] // CHECK5: omp.inner.for.end: -// CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG92]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG93]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG92]], !llvm.loop [[LOOP100:![0-9]+]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG93]], !llvm.loop [[LOOP101:![0-9]+]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: ret void, !dbg [[DBG97]] +// CHECK5-NEXT: ret void, !dbg [[DBG98]] // // // CHECK5-LABEL: define {{[^@]+}}@_Z3foov -// CHECK5-SAME: () #[[ATTR3:[0-9]+]] !dbg [[DBG101:![0-9]+]] { +// CHECK5-SAME: () #[[ATTR3:[0-9]+]] !dbg [[DBG102:![0-9]+]] { // CHECK5-NEXT: entry: -// CHECK5-NEXT: call void @_Z8mayThrowv(), !dbg [[DBG102:![0-9]+]] -// CHECK5-NEXT: ret i32 0, !dbg [[DBG102]] +// CHECK5-NEXT: call void @_Z8mayThrowv(), !dbg [[DBG103:![0-9]+]] +// CHECK5-NEXT: ret i32 0, !dbg [[DBG103]] // // // CHECK5-LABEL: define {{[^@]+}}@_Z12parallel_forPfi -// CHECK5-SAME: (float* [[A:%.*]], i32 [[N:%.*]]) #[[ATTR0]] !dbg [[DBG103:![0-9]+]] { +// CHECK5-SAME: (float* [[A:%.*]], i32 [[N:%.*]]) #[[ATTR0]] !dbg [[DBG104:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[A_ADDR:%.*]] = alloca float*, align 8 // CHECK5-NEXT: [[N_ADDR:%.*]] = alloca i32, align 4 @@ -5664,24 +5664,24 @@ void range_for_collapsed() { // CHECK5-NEXT: [[N_CASTED:%.*]] = alloca i64, align 8 // CHECK5-NEXT: store float* [[A]], float** [[A_ADDR]], align 8 // CHECK5-NEXT: store i32 [[N]], i32* [[N_ADDR]], align 4 -// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4, !dbg [[DBG104:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64, !dbg [[DBG104]] -// CHECK5-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave(), !dbg [[DBG104]] -// CHECK5-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8, !dbg [[DBG104]] -// CHECK5-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 16, !dbg [[DBG104]] -// CHECK5-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8, !dbg [[DBG104]] -// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4, !dbg [[DBG105:![0-9]+]] -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[N_CASTED]] to i32*, !dbg [[DBG105]] -// CHECK5-NEXT: store i32 [[TMP3]], i32* [[CONV]], align 4, !dbg [[DBG105]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8, !dbg [[DBG105]] -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB32:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, i64, i64)* @.omp_outlined..8 to void (i32*, i32*, ...)*), float** [[A_ADDR]], i64 [[TMP1]], i64 [[TMP4]]), !dbg [[DBG105]] -// CHECK5-NEXT: [[TMP5:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8, !dbg [[DBG106:![0-9]+]] -// CHECK5-NEXT: call void @llvm.stackrestore(i8* [[TMP5]]), !dbg [[DBG106]] -// CHECK5-NEXT: ret void, !dbg [[DBG106]] +// CHECK5-NEXT: [[TMP0:%.*]] = load i32, i32* [[N_ADDR]], align 4, !dbg [[DBG105:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = zext i32 [[TMP0]] to i64, !dbg [[DBG105]] +// CHECK5-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave(), !dbg [[DBG105]] +// CHECK5-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8, !dbg [[DBG105]] +// CHECK5-NEXT: [[VLA:%.*]] = alloca float, i64 [[TMP1]], align 16, !dbg [[DBG105]] +// CHECK5-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8, !dbg [[DBG105]] +// CHECK5-NEXT: [[TMP3:%.*]] = load i32, i32* [[N_ADDR]], align 4, !dbg [[DBG106:![0-9]+]] +// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[N_CASTED]] to i32*, !dbg [[DBG106]] +// CHECK5-NEXT: store i32 [[TMP3]], i32* [[CONV]], align 4, !dbg [[DBG106]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i64, i64* [[N_CASTED]], align 8, !dbg [[DBG106]] +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB32:[0-9]+]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float**, i64, i64)* @.omp_outlined..8 to void (i32*, i32*, ...)*), float** [[A_ADDR]], i64 [[TMP1]], i64 [[TMP4]]), !dbg [[DBG106]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8, !dbg [[DBG107:![0-9]+]] +// CHECK5-NEXT: call void @llvm.stackrestore(i8* [[TMP5]]), !dbg [[DBG107]] +// CHECK5-NEXT: ret void, !dbg [[DBG107]] // // // CHECK5-LABEL: define {{[^@]+}}@.omp_outlined..8 -// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], i64 [[VLA:%.*]], i64 [[N:%.*]]) #[[ATTR1]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG107:![0-9]+]] { +// CHECK5-SAME: (i32* noalias [[DOTGLOBAL_TID_:%.*]], i32* noalias [[DOTBOUND_TID_:%.*]], float** nonnull align 8 dereferenceable(8) [[A:%.*]], i64 [[VLA:%.*]], i64 [[N:%.*]]) #[[ATTR1]] personality i8* bitcast (i32 (...)* @__gxx_personality_v0 to i8*) !dbg [[DBG108:![0-9]+]] { // CHECK5-NEXT: entry: // CHECK5-NEXT: [[DOTGLOBAL_TID__ADDR:%.*]] = alloca i32*, align 8 // CHECK5-NEXT: [[DOTBOUND_TID__ADDR:%.*]] = alloca i32*, align 8 @@ -5702,105 +5702,105 @@ void range_for_collapsed() { // CHECK5-NEXT: store float** [[A]], float*** [[A_ADDR]], align 8 // CHECK5-NEXT: store i64 [[VLA]], i64* [[VLA_ADDR]], align 8 // CHECK5-NEXT: store i64 [[N]], i64* [[N_ADDR]], align 8 -// CHECK5-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8, !dbg [[DBG108:![0-9]+]] -// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG108]] -// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32*, !dbg [[DBG108]] -// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG109:![0-9]+]] -// CHECK5-NEXT: store i32 16908288, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG109]] -// CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG109]] -// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG109]] -// CHECK5-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave(), !dbg [[DBG108]] -// CHECK5-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8, !dbg [[DBG108]] -// CHECK5-NEXT: [[VLA1:%.*]] = alloca float, i64 [[TMP1]], align 16, !dbg [[DBG108]] -// CHECK5-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8, !dbg [[DBG108]] -// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG108]] -// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4, !dbg [[DBG108]] -// CHECK5-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB29:[0-9]+]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5), !dbg [[DBG108]] -// CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG108]] +// CHECK5-NEXT: [[TMP0:%.*]] = load float**, float*** [[A_ADDR]], align 8, !dbg [[DBG109:![0-9]+]] +// CHECK5-NEXT: [[TMP1:%.*]] = load i64, i64* [[VLA_ADDR]], align 8, !dbg [[DBG109]] +// CHECK5-NEXT: [[CONV:%.*]] = bitcast i64* [[N_ADDR]] to i32*, !dbg [[DBG109]] +// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG110:![0-9]+]] +// CHECK5-NEXT: store i32 16908288, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG110]] +// CHECK5-NEXT: store i32 1, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG110]] +// CHECK5-NEXT: store i32 0, i32* [[DOTOMP_IS_LAST]], align 4, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP2:%.*]] = call i8* @llvm.stacksave(), !dbg [[DBG109]] +// CHECK5-NEXT: store i8* [[TMP2]], i8** [[SAVED_STACK]], align 8, !dbg [[DBG109]] +// CHECK5-NEXT: [[VLA1:%.*]] = alloca float, i64 [[TMP1]], align 16, !dbg [[DBG109]] +// CHECK5-NEXT: store i64 [[TMP1]], i64* [[__VLA_EXPR0]], align 8, !dbg [[DBG109]] +// CHECK5-NEXT: [[TMP3:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !dbg [[DBG109]] +// CHECK5-NEXT: [[TMP4:%.*]] = load i32, i32* [[TMP3]], align 4, !dbg [[DBG109]] +// CHECK5-NEXT: call void @__kmpc_for_static_init_4u(%struct.ident_t* @[[GLOB29:[0-9]+]], i32 [[TMP4]], i32 33, i32* [[DOTOMP_IS_LAST]], i32* [[DOTOMP_LB]], i32* [[DOTOMP_UB]], i32* [[DOTOMP_STRIDE]], i32 1, i32 5), !dbg [[DBG109]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_COND:%.*]], !dbg [[DBG109]] // CHECK5: omp.dispatch.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG109]] -// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], 16908288, !dbg [[DBG109]] -// CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG109]] +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG110]] +// CHECK5-NEXT: [[CMP:%.*]] = icmp ugt i32 [[TMP5]], 16908288, !dbg [[DBG110]] +// CHECK5-NEXT: br i1 [[CMP]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]], !dbg [[DBG110]] // CHECK5: cond.true: -// CHECK5-NEXT: br label [[COND_END:%.*]], !dbg [[DBG109]] +// CHECK5-NEXT: br label [[COND_END:%.*]], !dbg [[DBG110]] // CHECK5: cond.false: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG109]] -// CHECK5-NEXT: br label [[COND_END]], !dbg [[DBG109]] +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG110]] +// CHECK5-NEXT: br label [[COND_END]], !dbg [[DBG110]] // CHECK5: cond.end: -// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ], !dbg [[DBG109]] -// CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG109]] -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG109]] -// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG109]] -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG109]] -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG109]] -// CHECK5-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP8]], [[TMP9]], !dbg [[DBG109]] -// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]], !dbg [[DBG108]] +// CHECK5-NEXT: [[COND:%.*]] = phi i32 [ 16908288, [[COND_TRUE]] ], [ [[TMP6]], [[COND_FALSE]] ], !dbg [[DBG110]] +// CHECK5-NEXT: store i32 [[COND]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG110]] +// CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG110]] +// CHECK5-NEXT: [[CMP2:%.*]] = icmp ule i32 [[TMP8]], [[TMP9]], !dbg [[DBG110]] +// CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_DISPATCH_BODY:%.*]], label [[OMP_DISPATCH_CLEANUP:%.*]], !dbg [[DBG109]] // CHECK5: omp.dispatch.cleanup: -// CHECK5-NEXT: br label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG108]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_END:%.*]], !dbg [[DBG109]] // CHECK5: omp.dispatch.body: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG108]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]], !dbg [[DBG109]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG109]] -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG109]] -// CHECK5-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]], !dbg [[DBG109]] -// CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]], !dbg [[DBG108]] +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG110]] +// CHECK5-NEXT: [[CMP3:%.*]] = icmp ule i32 [[TMP10]], [[TMP11]], !dbg [[DBG110]] +// CHECK5-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]], !dbg [[DBG109]] // CHECK5: omp.inner.for.cond.cleanup: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG108]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_END:%.*]], !dbg [[DBG109]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG109]] -// CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP12]], 127, !dbg [[DBG109]] -// CHECK5-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]], !dbg [[DBG109]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !dbg [[DBG109]] +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG110]] +// CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP12]], 127, !dbg [[DBG110]] +// CHECK5-NEXT: [[ADD:%.*]] = add i32 131071, [[MUL]], !dbg [[DBG110]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !dbg [[DBG110]] // CHECK5-NEXT: [[CALL:%.*]] = invoke i32 @_Z3foov() -// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG110:![0-9]+]] +// CHECK5-NEXT: to label [[INVOKE_CONT:%.*]] unwind label [[TERMINATE_LPAD:%.*]], !dbg [[DBG111:![0-9]+]] // CHECK5: invoke.cont: -// CHECK5-NEXT: [[CONV4:%.*]] = sitofp i32 [[CALL]] to float, !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP13]] to i64, !dbg [[DBG110]] -// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VLA1]], i64 [[IDXPROM]], !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[ADD5:%.*]] = fadd float [[CONV4]], [[TMP14]], !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8, !dbg [[DBG110]] -// CHECK5-NEXT: [[CONV6:%.*]] = sitofp i32 [[TMP15]] to float, !dbg [[DBG110]] -// CHECK5-NEXT: [[ADD7:%.*]] = fadd float [[ADD5]], [[CONV6]], !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP0]], align 8, !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[IDXPROM8:%.*]] = zext i32 [[TMP17]] to i64, !dbg [[DBG110]] -// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM8]], !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX9]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: [[ADD10:%.*]] = fadd float [[TMP18]], [[ADD7]], !dbg [[DBG110]] -// CHECK5-NEXT: store float [[ADD10]], float* [[ARRAYIDX9]], align 4, !dbg [[DBG110]] -// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG110]] +// CHECK5-NEXT: [[CONV4:%.*]] = sitofp i32 [[CALL]] to float, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[IDXPROM:%.*]] = zext i32 [[TMP13]] to i64, !dbg [[DBG111]] +// CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[VLA1]], i64 [[IDXPROM]], !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP14:%.*]] = load float, float* [[ARRAYIDX]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[ADD5:%.*]] = fadd float [[CONV4]], [[TMP14]], !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[CONV]], align 8, !dbg [[DBG111]] +// CHECK5-NEXT: [[CONV6:%.*]] = sitofp i32 [[TMP15]] to float, !dbg [[DBG111]] +// CHECK5-NEXT: [[ADD7:%.*]] = fadd float [[ADD5]], [[CONV6]], !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP0]], align 8, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[I]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[IDXPROM8:%.*]] = zext i32 [[TMP17]] to i64, !dbg [[DBG111]] +// CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM8]], !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX9]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: [[ADD10:%.*]] = fadd float [[TMP18]], [[ADD7]], !dbg [[DBG111]] +// CHECK5-NEXT: store float [[ADD10]], float* [[ARRAYIDX9]], align 4, !dbg [[DBG111]] +// CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]], !dbg [[DBG111]] // CHECK5: omp.body.continue: -// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG108]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]], !dbg [[DBG109]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG109]] -// CHECK5-NEXT: [[ADD11:%.*]] = add i32 [[TMP19]], 1, !dbg [[DBG109]] -// CHECK5-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG109]] -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG108]], !llvm.loop [[LOOP111:![0-9]+]] +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !dbg [[DBG110]] +// CHECK5-NEXT: [[ADD11:%.*]] = add i32 [[TMP19]], 1, !dbg [[DBG110]] +// CHECK5-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_IV]], align 4, !dbg [[DBG110]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !dbg [[DBG109]], !llvm.loop [[LOOP112:![0-9]+]] // CHECK5: omp.inner.for.end: -// CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG108]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]], !dbg [[DBG109]] // CHECK5: omp.dispatch.inc: -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG109]] -// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG109]] -// CHECK5-NEXT: [[ADD12:%.*]] = add i32 [[TMP20]], [[TMP21]], !dbg [[DBG109]] -// CHECK5-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_LB]], align 4, !dbg [[DBG109]] -// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG109]] -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG109]] -// CHECK5-NEXT: [[ADD13:%.*]] = add i32 [[TMP22]], [[TMP23]], !dbg [[DBG109]] -// CHECK5-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG109]] -// CHECK5-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG108]], !llvm.loop [[LOOP112:![0-9]+]] +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG110]] +// CHECK5-NEXT: [[ADD12:%.*]] = add i32 [[TMP20]], [[TMP21]], !dbg [[DBG110]] +// CHECK5-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_LB]], align 4, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !dbg [[DBG110]] +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !dbg [[DBG110]] +// CHECK5-NEXT: [[ADD13:%.*]] = add i32 [[TMP22]], [[TMP23]], !dbg [[DBG110]] +// CHECK5-NEXT: store i32 [[ADD13]], i32* [[DOTOMP_UB]], align 4, !dbg [[DBG110]] +// CHECK5-NEXT: br label [[OMP_DISPATCH_COND]], !dbg [[DBG109]], !llvm.loop [[LOOP113:![0-9]+]] // CHECK5: omp.dispatch.end: -// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB31:[0-9]+]], i32 [[TMP4]]), !dbg [[DBG108]] -// CHECK5-NEXT: [[TMP24:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8, !dbg [[DBG108]] -// CHECK5-NEXT: call void @llvm.stackrestore(i8* [[TMP24]]), !dbg [[DBG108]] -// CHECK5-NEXT: ret void, !dbg [[DBG110]] +// CHECK5-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB31:[0-9]+]], i32 [[TMP4]]), !dbg [[DBG109]] +// CHECK5-NEXT: [[TMP24:%.*]] = load i8*, i8** [[SAVED_STACK]], align 8, !dbg [[DBG109]] +// CHECK5-NEXT: call void @llvm.stackrestore(i8* [[TMP24]]), !dbg [[DBG109]] +// CHECK5-NEXT: ret void, !dbg [[DBG111]] // CHECK5: terminate.lpad: // CHECK5-NEXT: [[TMP25:%.*]] = landingpad { i8*, i32 } -// CHECK5-NEXT: catch i8* null, !dbg [[DBG110]] -// CHECK5-NEXT: [[TMP26:%.*]] = extractvalue { i8*, i32 } [[TMP25]], 0, !dbg [[DBG110]] -// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP26]]) #[[ATTR7:[0-9]+]], !dbg [[DBG110]] -// CHECK5-NEXT: unreachable, !dbg [[DBG110]] +// CHECK5-NEXT: catch i8* null, !dbg [[DBG111]] +// CHECK5-NEXT: [[TMP26:%.*]] = extractvalue { i8*, i32 } [[TMP25]], 0, !dbg [[DBG111]] +// CHECK5-NEXT: call void @__clang_call_terminate(i8* [[TMP26]]) #[[ATTR7:[0-9]+]], !dbg [[DBG111]] +// CHECK5-NEXT: unreachable, !dbg [[DBG111]] // // // CHECK5-LABEL: define {{[^@]+}}@__clang_call_terminate @@ -6339,42 +6339,42 @@ void range_for_collapsed() { // CHECK6-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_IV]], align 8 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !4 -// CHECK6-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK6-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !5 // CHECK6-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1 // CHECK6-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 // CHECK6-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127 // CHECK6-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] -// CHECK6-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !llvm.access.group !4 -// CHECK6-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !4 -// CHECK6-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !4 +// CHECK6-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !llvm.access.group !5 +// CHECK6-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !5 +// CHECK6-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP12]] -// CHECK6-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !4 -// CHECK6-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !4 -// CHECK6-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !4 +// CHECK6-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !5 +// CHECK6-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !5 +// CHECK6-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 // CHECK6-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP15]] -// CHECK6-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !4 +// CHECK6-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !5 // CHECK6-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] -// CHECK6-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !4 -// CHECK6-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !4 +// CHECK6-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !5 +// CHECK6-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 // CHECK6-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[TMP18]] -// CHECK6-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !4 +// CHECK6-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !5 // CHECK6-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] -// CHECK6-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !4 -// CHECK6-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !4 +// CHECK6-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !5 +// CHECK6-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !5 // CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[TMP21]] -// CHECK6-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !4 +// CHECK6-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !5 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !4 +// CHECK6-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 // CHECK6-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1 -// CHECK6-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK6-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !5 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: @@ -6441,42 +6441,42 @@ void range_for_collapsed() { // CHECK6-NEXT: store i64 [[TMP7]], i64* [[DOTOMP_IV]], align 8 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !7 -// CHECK6-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !7 +// CHECK6-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !8 // CHECK6-NEXT: [[ADD:%.*]] = add i64 [[TMP9]], 1 // CHECK6-NEXT: [[CMP:%.*]] = icmp ult i64 [[TMP8]], [[ADD]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !7 +// CHECK6-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 // CHECK6-NEXT: [[MUL:%.*]] = mul i64 [[TMP10]], 127 // CHECK6-NEXT: [[ADD1:%.*]] = add i64 131071, [[MUL]] -// CHECK6-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !llvm.access.group !7 -// CHECK6-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !7 -// CHECK6-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !7 +// CHECK6-NEXT: store i64 [[ADD1]], i64* [[I]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: [[TMP11:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: [[TMP12:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP11]], i64 [[TMP12]] -// CHECK6-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !7 -// CHECK6-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !7 -// CHECK6-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !7 +// CHECK6-NEXT: [[TMP13:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !8 +// CHECK6-NEXT: [[TMP14:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: [[TMP15:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 // CHECK6-NEXT: [[ARRAYIDX2:%.*]] = getelementptr inbounds float, float* [[TMP14]], i64 [[TMP15]] -// CHECK6-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !7 +// CHECK6-NEXT: [[TMP16:%.*]] = load float, float* [[ARRAYIDX2]], align 4, !llvm.access.group !8 // CHECK6-NEXT: [[MUL3:%.*]] = fmul float [[TMP13]], [[TMP16]] -// CHECK6-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !7 -// CHECK6-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !7 +// CHECK6-NEXT: [[TMP17:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: [[TMP18:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 // CHECK6-NEXT: [[ARRAYIDX4:%.*]] = getelementptr inbounds float, float* [[TMP17]], i64 [[TMP18]] -// CHECK6-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !7 +// CHECK6-NEXT: [[TMP19:%.*]] = load float, float* [[ARRAYIDX4]], align 4, !llvm.access.group !8 // CHECK6-NEXT: [[MUL5:%.*]] = fmul float [[MUL3]], [[TMP19]] -// CHECK6-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !7 -// CHECK6-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !7 +// CHECK6-NEXT: [[TMP20:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: [[TMP21:%.*]] = load i64, i64* [[I]], align 8, !llvm.access.group !8 // CHECK6-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds float, float* [[TMP20]], i64 [[TMP21]] -// CHECK6-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !7 +// CHECK6-NEXT: store float [[MUL5]], float* [[ARRAYIDX6]], align 4, !llvm.access.group !8 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !7 +// CHECK6-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 // CHECK6-NEXT: [[ADD7:%.*]] = add i64 [[TMP22]], 1 -// CHECK6-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !7 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK6-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !8 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: @@ -6580,58 +6580,58 @@ void range_for_collapsed() { // CHECK6-NEXT: store i64 [[TMP16]], i64* [[DOTOMP_IV]], align 8 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK6-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !10 +// CHECK6-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 +// CHECK6-NEXT: [[TMP18:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !11 // CHECK6-NEXT: [[CMP9:%.*]] = icmp sle i64 [[TMP17]], [[TMP18]] // CHECK6-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP19:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !10 +// CHECK6-NEXT: [[TMP19:%.*]] = load i8, i8* [[DOTCAPTURE_EXPR_]], align 1, !llvm.access.group !11 // CHECK6-NEXT: [[CONV10:%.*]] = sext i8 [[TMP19]] to i64 -// CHECK6-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK6-NEXT: [[TMP20:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 // CHECK6-NEXT: [[DIV11:%.*]] = sdiv i64 [[TMP20]], 11 // CHECK6-NEXT: [[MUL12:%.*]] = mul nsw i64 [[DIV11]], 1 // CHECK6-NEXT: [[ADD13:%.*]] = add nsw i64 [[CONV10]], [[MUL12]] // CHECK6-NEXT: [[CONV14:%.*]] = trunc i64 [[ADD13]] to i8 -// CHECK6-NEXT: store i8 [[CONV14]], i8* [[I7]], align 1, !llvm.access.group !10 -// CHECK6-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK6-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK6-NEXT: store i8 [[CONV14]], i8* [[I7]], align 1, !llvm.access.group !11 +// CHECK6-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 +// CHECK6-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 // CHECK6-NEXT: [[DIV15:%.*]] = sdiv i64 [[TMP22]], 11 // CHECK6-NEXT: [[MUL16:%.*]] = mul nsw i64 [[DIV15]], 11 // CHECK6-NEXT: [[SUB17:%.*]] = sub nsw i64 [[TMP21]], [[MUL16]] // CHECK6-NEXT: [[MUL18:%.*]] = mul nsw i64 [[SUB17]], 1 // CHECK6-NEXT: [[SUB19:%.*]] = sub nsw i64 11, [[MUL18]] // CHECK6-NEXT: [[CONV20:%.*]] = trunc i64 [[SUB19]] to i32 -// CHECK6-NEXT: store i32 [[CONV20]], i32* [[X8]], align 4, !llvm.access.group !10 -// CHECK6-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !10 -// CHECK6-NEXT: [[TMP24:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !10 +// CHECK6-NEXT: store i32 [[CONV20]], i32* [[X8]], align 4, !llvm.access.group !11 +// CHECK6-NEXT: [[TMP23:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !11 +// CHECK6-NEXT: [[TMP24:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 // CHECK6-NEXT: [[IDXPROM:%.*]] = sext i8 [[TMP24]] to i64 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP23]], i64 [[IDXPROM]] -// CHECK6-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !10 -// CHECK6-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !10 -// CHECK6-NEXT: [[TMP27:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !10 +// CHECK6-NEXT: [[TMP25:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK6-NEXT: [[TMP26:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !11 +// CHECK6-NEXT: [[TMP27:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 // CHECK6-NEXT: [[IDXPROM21:%.*]] = sext i8 [[TMP27]] to i64 // CHECK6-NEXT: [[ARRAYIDX22:%.*]] = getelementptr inbounds float, float* [[TMP26]], i64 [[IDXPROM21]] -// CHECK6-NEXT: [[TMP28:%.*]] = load float, float* [[ARRAYIDX22]], align 4, !llvm.access.group !10 +// CHECK6-NEXT: [[TMP28:%.*]] = load float, float* [[ARRAYIDX22]], align 4, !llvm.access.group !11 // CHECK6-NEXT: [[MUL23:%.*]] = fmul float [[TMP25]], [[TMP28]] -// CHECK6-NEXT: [[TMP29:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !10 -// CHECK6-NEXT: [[TMP30:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !10 +// CHECK6-NEXT: [[TMP29:%.*]] = load float*, float** [[TMP4]], align 8, !llvm.access.group !11 +// CHECK6-NEXT: [[TMP30:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 // CHECK6-NEXT: [[IDXPROM24:%.*]] = sext i8 [[TMP30]] to i64 // CHECK6-NEXT: [[ARRAYIDX25:%.*]] = getelementptr inbounds float, float* [[TMP29]], i64 [[IDXPROM24]] -// CHECK6-NEXT: [[TMP31:%.*]] = load float, float* [[ARRAYIDX25]], align 4, !llvm.access.group !10 +// CHECK6-NEXT: [[TMP31:%.*]] = load float, float* [[ARRAYIDX25]], align 4, !llvm.access.group !11 // CHECK6-NEXT: [[MUL26:%.*]] = fmul float [[MUL23]], [[TMP31]] -// CHECK6-NEXT: [[TMP32:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !10 -// CHECK6-NEXT: [[TMP33:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !10 +// CHECK6-NEXT: [[TMP32:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !11 +// CHECK6-NEXT: [[TMP33:%.*]] = load i8, i8* [[I7]], align 1, !llvm.access.group !11 // CHECK6-NEXT: [[IDXPROM27:%.*]] = sext i8 [[TMP33]] to i64 // CHECK6-NEXT: [[ARRAYIDX28:%.*]] = getelementptr inbounds float, float* [[TMP32]], i64 [[IDXPROM27]] -// CHECK6-NEXT: store float [[MUL26]], float* [[ARRAYIDX28]], align 4, !llvm.access.group !10 +// CHECK6-NEXT: store float [[MUL26]], float* [[ARRAYIDX28]], align 4, !llvm.access.group !11 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 +// CHECK6-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 // CHECK6-NEXT: [[ADD29:%.*]] = add nsw i64 [[TMP34]], 1 -// CHECK6-NEXT: store i64 [[ADD29]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !10 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK6-NEXT: store i64 [[ADD29]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !11 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: @@ -6704,55 +6704,55 @@ void range_for_collapsed() { // CHECK6-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK6-NEXT: [[DIV:%.*]] = sdiv i32 [[TMP10]], 20 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[DIV]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 48, [[MUL]] // CHECK6-NEXT: [[CONV:%.*]] = trunc i32 [[ADD]] to i8 -// CHECK6-NEXT: store i8 [[CONV]], i8* [[I]], align 1, !llvm.access.group !13 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK6-NEXT: store i8 [[CONV]], i8* [[I]], align 1, !llvm.access.group !14 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK6-NEXT: [[DIV2:%.*]] = sdiv i32 [[TMP12]], 20 // CHECK6-NEXT: [[MUL3:%.*]] = mul nsw i32 [[DIV2]], 20 // CHECK6-NEXT: [[SUB:%.*]] = sub nsw i32 [[TMP11]], [[MUL3]] // CHECK6-NEXT: [[MUL4:%.*]] = mul nsw i32 [[SUB]], 1 // CHECK6-NEXT: [[ADD5:%.*]] = add nsw i32 -10, [[MUL4]] -// CHECK6-NEXT: store i32 [[ADD5]], i32* [[X]], align 4, !llvm.access.group !13 -// CHECK6-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !13 -// CHECK6-NEXT: [[TMP14:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !13 +// CHECK6-NEXT: store i32 [[ADD5]], i32* [[X]], align 4, !llvm.access.group !14 +// CHECK6-NEXT: [[TMP13:%.*]] = load float*, float** [[TMP1]], align 8, !llvm.access.group !14 +// CHECK6-NEXT: [[TMP14:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 // CHECK6-NEXT: [[IDXPROM:%.*]] = zext i8 [[TMP14]] to i64 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds float, float* [[TMP13]], i64 [[IDXPROM]] -// CHECK6-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !13 -// CHECK6-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !13 -// CHECK6-NEXT: [[TMP17:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !13 +// CHECK6-NEXT: [[TMP15:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !14 +// CHECK6-NEXT: [[TMP16:%.*]] = load float*, float** [[TMP2]], align 8, !llvm.access.group !14 +// CHECK6-NEXT: [[TMP17:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 // CHECK6-NEXT: [[IDXPROM6:%.*]] = zext i8 [[TMP17]] to i64 // CHECK6-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds float, float* [[TMP16]], i64 [[IDXPROM6]] -// CHECK6-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !13 +// CHECK6-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX7]], align 4, !llvm.access.group !14 // CHECK6-NEXT: [[MUL8:%.*]] = fmul float [[TMP15]], [[TMP18]] -// CHECK6-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !13 -// CHECK6-NEXT: [[TMP20:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !13 +// CHECK6-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP3]], align 8, !llvm.access.group !14 +// CHECK6-NEXT: [[TMP20:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 // CHECK6-NEXT: [[IDXPROM9:%.*]] = zext i8 [[TMP20]] to i64 // CHECK6-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP19]], i64 [[IDXPROM9]] -// CHECK6-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !13 +// CHECK6-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !14 // CHECK6-NEXT: [[MUL11:%.*]] = fmul float [[MUL8]], [[TMP21]] -// CHECK6-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !13 -// CHECK6-NEXT: [[TMP23:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !13 +// CHECK6-NEXT: [[TMP22:%.*]] = load float*, float** [[TMP0]], align 8, !llvm.access.group !14 +// CHECK6-NEXT: [[TMP23:%.*]] = load i8, i8* [[I]], align 1, !llvm.access.group !14 // CHECK6-NEXT: [[IDXPROM12:%.*]] = zext i8 [[TMP23]] to i64 // CHECK6-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds float, float* [[TMP22]], i64 [[IDXPROM12]] -// CHECK6-NEXT: store float [[MUL11]], float* [[ARRAYIDX13]], align 4, !llvm.access.group !13 +// CHECK6-NEXT: store float [[MUL11]], float* [[ARRAYIDX13]], align 4, !llvm.access.group !14 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK6-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK6-NEXT: [[ADD14:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD14]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: @@ -7766,4 +7766,3 @@ void range_for_collapsed() { // CHECK12: omp.precond.end: // CHECK12-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp index 46baa4a046bd2..f90e9027997ab 100644 --- a/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_reduction_task_codegen.cpp @@ -452,26 +452,26 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5:[0-9]+]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !11 +// CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* @@ -505,8 +505,8 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] // CHECK1-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP48:%.*]] = getelementptr i8, i8* [[TMP40]], i64 [[TMP47]] -// CHECK1-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !11 +// CHECK1-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !12 // CHECK1-NEXT: ret i32 0 // // @@ -976,26 +976,26 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK2-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK2-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK2-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5:[0-9]+]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !11 +// CHECK2-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK2-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK2-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK2-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK2-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK2-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* @@ -1029,8 +1029,8 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] // CHECK2-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK2-NEXT: [[TMP48:%.*]] = getelementptr i8, i8* [[TMP40]], i64 [[TMP47]] -// CHECK2-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !11 +// CHECK2-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !12 // CHECK2-NEXT: ret i32 0 // // @@ -1082,4 +1082,3 @@ int main(int argc, char **argv) { // CHECK2: omp.arraycpy.done5: // CHECK2-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/parallel_for_simd_codegen.cpp b/clang/test/OpenMP/parallel_for_simd_codegen.cpp index 213d745d6e7e5..eca2f18242c0a 100644 --- a/clang/test/OpenMP/parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/parallel_for_simd_codegen.cpp @@ -792,10 +792,12 @@ for (int i = 0; i < 10; ++i); // // OMP45-NOT: !{!"llvm.loop.vectorize.enable", i1 false} -// OMP45-DAG: ![[VECT]] = distinct !{![[VECT]], ![[VM:.+]]} +// OMP45-DAG: ![[VECT]] = distinct !{![[VECT]], ![[PA:.+]], ![[VM:.+]]} +// OMP45-DAG: ![[PA]] = !{!"llvm.loop.parallel_accesses", !{{.+}}} // OMP45-DAG: ![[VM]] = !{!"llvm.loop.vectorize.enable", i1 true} // OMP45-NOT: !{!"llvm.loop.vectorize.enable", i1 false} -// OMP50-DAG: ![[VECT]] = distinct !{![[VECT]], ![[VM:.+]]} +// OMP50-DAG: ![[VECT]] = distinct !{![[VECT]], ![[PA:.+]], ![[VM:.+]]} +// OMP50-DAG ![[PA]] = !{!"llvm.loop.parallel_accesses", !{{.+}}} // OMP50-DAG: ![[VM]] = !{!"llvm.loop.vectorize.enable", i1 true} // OMP50-DAG: ![[NOVECT]] = distinct !{![[NOVECT]], ![[NOVM:.+]]} // OMP50-DAG: ![[NOVM]] = !{!"llvm.loop.vectorize.enable", i1 false} @@ -808,7 +810,7 @@ void parallel_simd(float *a) { #pragma omp parallel for simd // TERM_DEBUG-NOT: __kmpc_global_thread_num // TERM_DEBUG: invoke i32 {{.*}}bar{{.*}}() - // TERM_DEBUG: unwind label %[[TERM_LPAD:.+]], + // TERM_DEBUG: unwind label %[[TERM_LPAD:[a-zA-Z0-9\.]+]], // TERM_DEBUG-NOT: __kmpc_global_thread_num // TERM_DEBUG: [[TERM_LPAD]] // TERM_DEBUG: call void @__clang_call_terminate diff --git a/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp index 850be632b50e0..3c75c4c74c710 100644 --- a/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_reduction_task_codegen.cpp @@ -407,26 +407,26 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5:[0-9]+]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !11 +// CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* @@ -460,8 +460,8 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] // CHECK1-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP48:%.*]] = getelementptr i8, i8* [[TMP40]], i64 [[TMP47]] -// CHECK1-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !11 +// CHECK1-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !12 // CHECK1-NEXT: ret i32 0 // // @@ -886,26 +886,26 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK2-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK2-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK2-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5:[0-9]+]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !11 +// CHECK2-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK2-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK2-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK2-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK2-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK2-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* @@ -939,8 +939,8 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] // CHECK2-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK2-NEXT: [[TMP48:%.*]] = getelementptr i8, i8* [[TMP40]], i64 [[TMP47]] -// CHECK2-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !11 +// CHECK2-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !12 // CHECK2-NEXT: ret i32 0 // // @@ -992,4 +992,3 @@ int main(int argc, char **argv) { // CHECK2: omp.arraycpy.done5: // CHECK2-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp index c1c6300db54e6..83ddf138f01d6 100644 --- a/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_codegen.cpp @@ -202,39 +202,39 @@ struct S { // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 8 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 -// CHECK1-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// CHECK1-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK1: omp.inner.for.cond.i: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK1-NEXT: [[CONV1_I:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV1_I]], [[TMP23]] // CHECK1-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] // CHECK1: omp.inner.for.body.i: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 -// CHECK1-NEXT: store i32 [[TMP24]], i32* [[I_I]], align 4, !noalias !13 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK1-NEXT: store i32 [[TMP24]], i32* [[I_I]], align 4, !noalias !14 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK1-NEXT: [[ADD2_I:%.*]] = add nsw i32 [[TMP25]], 1 -// CHECK1-NEXT: store i32 [[ADD2_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK1-NEXT: store i32 [[ADD2_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I]] // CHECK1: .omp_outlined..1.exit: // CHECK1-NEXT: ret i32 0 @@ -318,39 +318,39 @@ struct S { // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 8 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !27 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !27 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !27 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !27 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !27 -// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !27 -// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !27 -// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !27 -// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !27 -// CHECK1-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !27 -// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !27 -// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !27 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !27 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META26:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !28 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !28 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !28 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !28 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !28 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !28 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !28 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !28 +// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !28 +// CHECK1-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !28 +// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !28 +// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !28 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !28 // CHECK1-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !27 +// CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !28 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK1: omp.inner.for.cond.i: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !27 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !28 // CHECK1-NEXT: [[CONV1_I:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !27 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !28 // CHECK1-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV1_I]], [[TMP23]] // CHECK1-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__3_EXIT:%.*]] // CHECK1: omp.inner.for.body.i: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !27 -// CHECK1-NEXT: store i32 [[TMP24]], i32* [[I_I]], align 4, !noalias !27 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !27 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !28 +// CHECK1-NEXT: store i32 [[TMP24]], i32* [[I_I]], align 4, !noalias !28 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !28 // CHECK1-NEXT: [[ADD2_I:%.*]] = add nsw i32 [[TMP25]], 1 -// CHECK1-NEXT: store i32 [[ADD2_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !27 +// CHECK1-NEXT: store i32 [[ADD2_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !28 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I]] // CHECK1: .omp_outlined..3.exit: // CHECK1-NEXT: ret i32 0 @@ -498,31 +498,31 @@ struct S { // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 8 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META28:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META31:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META35:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META37:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !39 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !39 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !39 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !39 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !39 -// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !39 -// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !39 -// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !39 -// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !39 -// CHECK1-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !39 -// CHECK1-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !39 -// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !39 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META32:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META34:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META36:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !40 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !40 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !40 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !40 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !40 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !40 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !40 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !40 +// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !40 +// CHECK1-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !40 +// CHECK1-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !40 +// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !40 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP20]], i32 0, i32 0 // CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP21]], align 8 // CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK1-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !39 +// CHECK1-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !40 // CHECK1-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP20]], i32 0, i32 0 // CHECK1-NEXT: [[TMP25:%.*]] = load i32*, i32** [[TMP24]], align 8 // CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK1-NEXT: store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !39 +// CHECK1-NEXT: store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !40 // CHECK1-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP20]], i32 0, i32 1 // CHECK1-NEXT: [[TMP28:%.*]] = load i8***, i8**** [[TMP27]], align 8 // CHECK1-NEXT: [[TMP29:%.*]] = load i8**, i8*** [[TMP28]], align 8 @@ -539,63 +539,63 @@ struct S { // CHECK1-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i8, i8* [[TMP33]], i64 [[IDXPROM4_I]] // CHECK1-NEXT: [[TMP37:%.*]] = load i8, i8* [[ARRAYIDX5_I]], align 1 // CHECK1-NEXT: [[CONV_I:%.*]] = sext i8 [[TMP37]] to i32 -// CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !39 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !39 +// CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !40 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !40 // CHECK1-NEXT: [[CONV7_I:%.*]] = sext i32 [[TMP38]] to i64 -// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !39 -// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !39 +// CHECK1-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !40 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !40 // CHECK1-NEXT: [[SUB8_I:%.*]] = sub i32 [[TMP39]], [[TMP40]] // CHECK1-NEXT: [[SUB9_I:%.*]] = sub i32 [[SUB8_I]], 1 // CHECK1-NEXT: [[CONV11_I:%.*]] = zext i32 [[SUB8_I]] to i64 // CHECK1-NEXT: [[MUL_I:%.*]] = mul nsw i64 [[CONV7_I]], [[CONV11_I]] // CHECK1-NEXT: [[SUB12_I:%.*]] = sub nsw i64 [[MUL_I]], 1 -// CHECK1-NEXT: store i64 [[SUB12_I]], i64* [[DOTCAPTURE_EXPR_6_I]], align 8, !noalias !39 -// CHECK1-NEXT: store i32 0, i32* [[I_I]], align 4, !noalias !39 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !39 -// CHECK1-NEXT: store i32 [[TMP41]], i32* [[J_I]], align 4, !noalias !39 -// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !39 +// CHECK1-NEXT: store i64 [[SUB12_I]], i64* [[DOTCAPTURE_EXPR_6_I]], align 8, !noalias !40 +// CHECK1-NEXT: store i32 0, i32* [[I_I]], align 4, !noalias !40 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !40 +// CHECK1-NEXT: store i32 [[TMP41]], i32* [[J_I]], align 4, !noalias !40 +// CHECK1-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !40 // CHECK1-NEXT: [[CMP_I:%.*]] = icmp slt i32 0, [[TMP42]] // CHECK1-NEXT: br i1 [[CMP_I]], label [[LAND_LHS_TRUE_I:%.*]], label [[DOTOMP_OUTLINED__6_EXIT:%.*]] // CHECK1: land.lhs.true.i: -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !39 -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !39 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !40 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !40 // CHECK1-NEXT: [[CMP13_I:%.*]] = icmp slt i32 [[TMP43]], [[TMP44]] // CHECK1-NEXT: br i1 [[CMP13_I]], label [[TASKLOOP_IF_THEN_I:%.*]], label [[DOTOMP_OUTLINED__6_EXIT]] // CHECK1: taskloop.if.then.i: -// CHECK1-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !39 -// CHECK1-NEXT: store i64 [[TMP45]], i64* [[DOTOMP_IV_I]], align 8, !noalias !39 +// CHECK1-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !40 +// CHECK1-NEXT: store i64 [[TMP45]], i64* [[DOTOMP_IV_I]], align 8, !noalias !40 // CHECK1-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP20]], i32 0, i32 0 // CHECK1-NEXT: [[TMP47:%.*]] = load i32*, i32** [[TMP46]], align 8 // CHECK1-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP20]], i32 0, i32 1 // CHECK1-NEXT: [[TMP49:%.*]] = load i8***, i8**** [[TMP48]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK1: omp.inner.for.cond.i: -// CHECK1-NEXT: [[TMP50:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !39 -// CHECK1-NEXT: [[TMP51:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !39 +// CHECK1-NEXT: [[TMP50:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !40 +// CHECK1-NEXT: [[TMP51:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !40 // CHECK1-NEXT: [[CMP16_I:%.*]] = icmp ule i64 [[TMP50]], [[TMP51]] // CHECK1-NEXT: br i1 [[CMP16_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]] // CHECK1: omp.inner.for.body.i: -// CHECK1-NEXT: [[TMP52:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !39 -// CHECK1-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !39 -// CHECK1-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !39 +// CHECK1-NEXT: [[TMP52:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !40 +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !40 +// CHECK1-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !40 // CHECK1-NEXT: [[SUB17_I:%.*]] = sub i32 [[TMP53]], [[TMP54]] // CHECK1-NEXT: [[SUB18_I:%.*]] = sub i32 [[SUB17_I]], 1 // CHECK1-NEXT: [[CONV22_I:%.*]] = zext i32 [[SUB17_I]] to i64 // CHECK1-NEXT: [[DIV23_I:%.*]] = sdiv i64 [[TMP52]], [[CONV22_I]] // CHECK1-NEXT: [[CONV26_I:%.*]] = trunc i64 [[DIV23_I]] to i32 -// CHECK1-NEXT: store i32 [[CONV26_I]], i32* [[I14_I]], align 4, !noalias !39 -// CHECK1-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !39 +// CHECK1-NEXT: store i32 [[CONV26_I]], i32* [[I14_I]], align 4, !noalias !40 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !40 // CHECK1-NEXT: [[CONV27_I:%.*]] = sext i32 [[TMP55]] to i64 -// CHECK1-NEXT: [[TMP56:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !39 -// CHECK1-NEXT: [[TMP57:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !39 -// CHECK1-NEXT: [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !39 -// CHECK1-NEXT: [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !39 +// CHECK1-NEXT: [[TMP56:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !40 +// CHECK1-NEXT: [[TMP57:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !40 +// CHECK1-NEXT: [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !40 +// CHECK1-NEXT: [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !40 // CHECK1-NEXT: [[SUB28_I:%.*]] = sub i32 [[TMP58]], [[TMP59]] // CHECK1-NEXT: [[SUB29_I:%.*]] = sub i32 [[SUB28_I]], 1 // CHECK1-NEXT: [[CONV33_I:%.*]] = zext i32 [[SUB28_I]] to i64 // CHECK1-NEXT: [[DIV34_I:%.*]] = sdiv i64 [[TMP57]], [[CONV33_I]] -// CHECK1-NEXT: [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !39 -// CHECK1-NEXT: [[TMP61:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !39 +// CHECK1-NEXT: [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !40 +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !40 // CHECK1-NEXT: [[SUB35_I:%.*]] = sub i32 [[TMP60]], [[TMP61]] // CHECK1-NEXT: [[SUB36_I:%.*]] = sub i32 [[SUB35_I]], 1 // CHECK1-NEXT: [[CONV40_I:%.*]] = zext i32 [[SUB35_I]] to i64 @@ -603,10 +603,10 @@ struct S { // CHECK1-NEXT: [[SUB42_I:%.*]] = sub nsw i64 [[TMP56]], [[MUL41_I]] // CHECK1-NEXT: [[ADD44_I:%.*]] = add nsw i64 [[CONV27_I]], [[SUB42_I]] // CHECK1-NEXT: [[CONV45_I:%.*]] = trunc i64 [[ADD44_I]] to i32 -// CHECK1-NEXT: store i32 [[CONV45_I]], i32* [[J15_I]], align 4, !noalias !39 -// CHECK1-NEXT: [[TMP62:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !39 +// CHECK1-NEXT: store i32 [[CONV45_I]], i32* [[J15_I]], align 4, !noalias !40 +// CHECK1-NEXT: [[TMP62:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !40 // CHECK1-NEXT: [[ADD46_I:%.*]] = add nsw i64 [[TMP62]], 1 -// CHECK1-NEXT: store i64 [[ADD46_I]], i64* [[DOTOMP_IV_I]], align 8, !noalias !39 +// CHECK1-NEXT: store i64 [[ADD46_I]], i64* [[DOTOMP_IV_I]], align 8, !noalias !40 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I]] // CHECK1: omp.inner.for.end.i: // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__6_EXIT]] @@ -690,61 +690,61 @@ struct S { // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 8 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META40:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META45:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META47:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !51 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !51 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !51 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !51 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !51 -// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !51 -// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !51 -// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !51 -// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !51 -// CHECK1-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !51 -// CHECK1-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !51 -// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !51 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !51 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META41:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META44:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META46:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META48:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META50:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !52 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !52 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !52 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !52 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !52 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !52 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !52 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !52 +// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !52 +// CHECK1-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !52 +// CHECK1-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !52 +// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !52 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !52 // CHECK1-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !51 +// CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !52 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK1: omp.inner.for.cond.i: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !51 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !52 // CHECK1-NEXT: [[CONV1_I:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !51 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !52 // CHECK1-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV1_I]], [[TMP23]] // CHECK1-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]] // CHECK1: omp.inner.for.body.i: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !51 -// CHECK1-NEXT: store i32 [[TMP24]], i32* [[I_I]], align 4, !noalias !51 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !51 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !52 +// CHECK1-NEXT: store i32 [[TMP24]], i32* [[I_I]], align 4, !noalias !52 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !52 // CHECK1-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]], i32 4) #[[ATTR2]] // CHECK1-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK1-NEXT: br i1 [[TMP27]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]] // CHECK1: .cancel.exit.i: -// CHECK1-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !51 +// CHECK1-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !52 // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__9_EXIT:%.*]] // CHECK1: .cancel.continue.i: -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !51 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !52 // CHECK1-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]], i32 4) #[[ATTR2]] // CHECK1-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 // CHECK1-NEXT: br i1 [[TMP30]], label [[DOTCANCEL_EXIT2_I:%.*]], label [[DOTCANCEL_CONTINUE3_I:%.*]] // CHECK1: .cancel.exit2.i: -// CHECK1-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !51 +// CHECK1-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !52 // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__9_EXIT]] // CHECK1: .cancel.continue3.i: -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !51 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !52 // CHECK1-NEXT: [[ADD4_I:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK1-NEXT: store i32 [[ADD4_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !51 +// CHECK1-NEXT: store i32 [[ADD4_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !52 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I]] // CHECK1: omp.inner.for.end.i: -// CHECK1-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !51 +// CHECK1-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !52 // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__9_EXIT]] // CHECK1: .omp_outlined..9.exit: -// CHECK1-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !51 +// CHECK1-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !52 // CHECK1-NEXT: ret i32 0 // // @@ -908,60 +908,60 @@ struct S { // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 8 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META52:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META55:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META57:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META59:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META61:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !63 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !63 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !63 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !63 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !63 -// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !63 -// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !63 -// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !63 -// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !63 -// CHECK1-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !63 -// CHECK1-NEXT: store %struct.anon.6* [[TMP8]], %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !63 -// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !63 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META53:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META56:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META58:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META60:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META62:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !64 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !64 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !64 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !64 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !64 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !64 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !64 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !64 +// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !64 +// CHECK1-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !64 +// CHECK1-NEXT: store %struct.anon.6* [[TMP8]], %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !64 +// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !64 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP20]], i32 0, i32 0 // CHECK1-NEXT: [[TMP22:%.*]] = load %struct.S*, %struct.S** [[TMP21]], align 8 -// CHECK1-NEXT: store i32* [[TMP_I]], i32** [[TMP1_I]], align 8, !noalias !63 +// CHECK1-NEXT: store i32* [[TMP_I]], i32** [[TMP1_I]], align 8, !noalias !64 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP20]], i32 0, i32 1 // CHECK1-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP23]], align 8 // CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK1-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !63 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !63 +// CHECK1-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !64 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !64 // CHECK1-NEXT: [[SUB3_I:%.*]] = sub nsw i32 [[TMP26]], 1 -// CHECK1-NEXT: store i32 [[SUB3_I]], i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !63 -// CHECK1-NEXT: store i32* [[A_I]], i32** [[TMP4_I]], align 8, !noalias !63 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP4_I]], align 8, !noalias !63 +// CHECK1-NEXT: store i32 [[SUB3_I]], i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !64 +// CHECK1-NEXT: store i32* [[A_I]], i32** [[TMP4_I]], align 8, !noalias !64 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP4_I]], align 8, !noalias !64 // CHECK1-NEXT: store i32 0, i32* [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !63 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !64 // CHECK1-NEXT: [[CMP_I:%.*]] = icmp slt i32 0, [[TMP28]] // CHECK1-NEXT: br i1 [[CMP_I]], label [[TASKLOOP_IF_THEN_I:%.*]], label [[DOTOMP_OUTLINED__12_EXIT:%.*]] // CHECK1: taskloop.if.then.i: -// CHECK1-NEXT: store i32* [[A5_I]], i32** [[TMP6_I]], align 8, !noalias !63 -// CHECK1-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !63 +// CHECK1-NEXT: store i32* [[A5_I]], i32** [[TMP6_I]], align 8, !noalias !64 +// CHECK1-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !64 // CHECK1-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP29]] to i32 -// CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !63 +// CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !64 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP20]], i32 0, i32 1 // CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP30]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK1: omp.inner.for.cond.i: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !63 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !64 // CHECK1-NEXT: [[CONV7_I:%.*]] = sext i32 [[TMP32]] to i64 -// CHECK1-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !63 +// CHECK1-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !64 // CHECK1-NEXT: [[CMP8_I:%.*]] = icmp ule i64 [[CONV7_I]], [[TMP33]] // CHECK1-NEXT: br i1 [[CMP8_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]] // CHECK1: omp.inner.for.body.i: -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !63 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[TMP6_I]], align 8, !noalias !63 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !64 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[TMP6_I]], align 8, !noalias !64 // CHECK1-NEXT: store i32 [[TMP34]], i32* [[TMP35]], align 4 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !63 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !64 // CHECK1-NEXT: [[ADD9_I:%.*]] = add nsw i32 [[TMP36]], 1 -// CHECK1-NEXT: store i32 [[ADD9_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !63 +// CHECK1-NEXT: store i32 [[ADD9_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !64 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I]] // CHECK1: omp.inner.for.end.i: // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__12_EXIT]] @@ -1128,39 +1128,39 @@ struct S { // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 8 // CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK2-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 -// CHECK2-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP20:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// CHECK2-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP20:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK2-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK2-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK2-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK2: omp.inner.for.cond.i: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK2-NEXT: [[CONV1_I:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK2-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 // CHECK2-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV1_I]], [[TMP23]] // CHECK2-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] // CHECK2: omp.inner.for.body.i: -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 -// CHECK2-NEXT: store i32 [[TMP24]], i32* [[I_I]], align 4, !noalias !13 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK2-NEXT: store i32 [[TMP24]], i32* [[I_I]], align 4, !noalias !14 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK2-NEXT: [[ADD2_I:%.*]] = add nsw i32 [[TMP25]], 1 -// CHECK2-NEXT: store i32 [[ADD2_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK2-NEXT: store i32 [[ADD2_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I]] // CHECK2: .omp_outlined..1.exit: // CHECK2-NEXT: ret i32 0 @@ -1244,39 +1244,39 @@ struct S { // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 8 // CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK2-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !27 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !27 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !27 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !27 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !27 -// CHECK2-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !27 -// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !27 -// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !27 -// CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !27 -// CHECK2-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !27 -// CHECK2-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !27 -// CHECK2-NEXT: [[TMP20:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !27 -// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !27 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META26:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !28 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !28 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !28 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !28 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !28 +// CHECK2-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !28 +// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !28 +// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !28 +// CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !28 +// CHECK2-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !28 +// CHECK2-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !28 +// CHECK2-NEXT: [[TMP20:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !28 +// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !28 // CHECK2-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK2-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !27 +// CHECK2-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !28 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK2: omp.inner.for.cond.i: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !27 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !28 // CHECK2-NEXT: [[CONV1_I:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK2-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !27 +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !28 // CHECK2-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV1_I]], [[TMP23]] // CHECK2-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__3_EXIT:%.*]] // CHECK2: omp.inner.for.body.i: -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !27 -// CHECK2-NEXT: store i32 [[TMP24]], i32* [[I_I]], align 4, !noalias !27 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !27 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !28 +// CHECK2-NEXT: store i32 [[TMP24]], i32* [[I_I]], align 4, !noalias !28 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !28 // CHECK2-NEXT: [[ADD2_I:%.*]] = add nsw i32 [[TMP25]], 1 -// CHECK2-NEXT: store i32 [[ADD2_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !27 +// CHECK2-NEXT: store i32 [[ADD2_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !28 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I]] // CHECK2: .omp_outlined..3.exit: // CHECK2-NEXT: ret i32 0 @@ -1424,31 +1424,31 @@ struct S { // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 8 // CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK2-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META28:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META31:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META35:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META37:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !39 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !39 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !39 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !39 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !39 -// CHECK2-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !39 -// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !39 -// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !39 -// CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !39 -// CHECK2-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !39 -// CHECK2-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !39 -// CHECK2-NEXT: [[TMP20:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !39 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META32:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META34:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META36:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !40 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !40 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !40 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !40 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !40 +// CHECK2-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !40 +// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !40 +// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !40 +// CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !40 +// CHECK2-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !40 +// CHECK2-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !40 +// CHECK2-NEXT: [[TMP20:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !40 // CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP20]], i32 0, i32 0 // CHECK2-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP21]], align 8 // CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK2-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !39 +// CHECK2-NEXT: store i32 [[TMP23]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !40 // CHECK2-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP20]], i32 0, i32 0 // CHECK2-NEXT: [[TMP25:%.*]] = load i32*, i32** [[TMP24]], align 8 // CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK2-NEXT: store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !39 +// CHECK2-NEXT: store i32 [[TMP26]], i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !40 // CHECK2-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP20]], i32 0, i32 1 // CHECK2-NEXT: [[TMP28:%.*]] = load i8***, i8**** [[TMP27]], align 8 // CHECK2-NEXT: [[TMP29:%.*]] = load i8**, i8*** [[TMP28]], align 8 @@ -1465,63 +1465,63 @@ struct S { // CHECK2-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i8, i8* [[TMP33]], i64 [[IDXPROM4_I]] // CHECK2-NEXT: [[TMP37:%.*]] = load i8, i8* [[ARRAYIDX5_I]], align 1 // CHECK2-NEXT: [[CONV_I:%.*]] = sext i8 [[TMP37]] to i32 -// CHECK2-NEXT: store i32 [[CONV_I]], i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !39 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !39 +// CHECK2-NEXT: store i32 [[CONV_I]], i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !40 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !40 // CHECK2-NEXT: [[CONV7_I:%.*]] = sext i32 [[TMP38]] to i64 -// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !39 -// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !39 +// CHECK2-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !40 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !40 // CHECK2-NEXT: [[SUB8_I:%.*]] = sub i32 [[TMP39]], [[TMP40]] // CHECK2-NEXT: [[SUB9_I:%.*]] = sub i32 [[SUB8_I]], 1 // CHECK2-NEXT: [[CONV11_I:%.*]] = zext i32 [[SUB8_I]] to i64 // CHECK2-NEXT: [[MUL_I:%.*]] = mul nsw i64 [[CONV7_I]], [[CONV11_I]] // CHECK2-NEXT: [[SUB12_I:%.*]] = sub nsw i64 [[MUL_I]], 1 -// CHECK2-NEXT: store i64 [[SUB12_I]], i64* [[DOTCAPTURE_EXPR_6_I]], align 8, !noalias !39 -// CHECK2-NEXT: store i32 0, i32* [[I_I]], align 4, !noalias !39 -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !39 -// CHECK2-NEXT: store i32 [[TMP41]], i32* [[J_I]], align 4, !noalias !39 -// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !39 +// CHECK2-NEXT: store i64 [[SUB12_I]], i64* [[DOTCAPTURE_EXPR_6_I]], align 8, !noalias !40 +// CHECK2-NEXT: store i32 0, i32* [[I_I]], align 4, !noalias !40 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !40 +// CHECK2-NEXT: store i32 [[TMP41]], i32* [[J_I]], align 4, !noalias !40 +// CHECK2-NEXT: [[TMP42:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !40 // CHECK2-NEXT: [[CMP_I:%.*]] = icmp slt i32 0, [[TMP42]] // CHECK2-NEXT: br i1 [[CMP_I]], label [[LAND_LHS_TRUE_I:%.*]], label [[DOTOMP_OUTLINED__6_EXIT:%.*]] // CHECK2: land.lhs.true.i: -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !39 -// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !39 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !40 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !40 // CHECK2-NEXT: [[CMP13_I:%.*]] = icmp slt i32 [[TMP43]], [[TMP44]] // CHECK2-NEXT: br i1 [[CMP13_I]], label [[TASKLOOP_IF_THEN_I:%.*]], label [[DOTOMP_OUTLINED__6_EXIT]] // CHECK2: taskloop.if.then.i: -// CHECK2-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !39 -// CHECK2-NEXT: store i64 [[TMP45]], i64* [[DOTOMP_IV_I]], align 8, !noalias !39 +// CHECK2-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !40 +// CHECK2-NEXT: store i64 [[TMP45]], i64* [[DOTOMP_IV_I]], align 8, !noalias !40 // CHECK2-NEXT: [[TMP46:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP20]], i32 0, i32 0 // CHECK2-NEXT: [[TMP47:%.*]] = load i32*, i32** [[TMP46]], align 8 // CHECK2-NEXT: [[TMP48:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP20]], i32 0, i32 1 // CHECK2-NEXT: [[TMP49:%.*]] = load i8***, i8**** [[TMP48]], align 8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK2: omp.inner.for.cond.i: -// CHECK2-NEXT: [[TMP50:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !39 -// CHECK2-NEXT: [[TMP51:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !39 +// CHECK2-NEXT: [[TMP50:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !40 +// CHECK2-NEXT: [[TMP51:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !40 // CHECK2-NEXT: [[CMP16_I:%.*]] = icmp ule i64 [[TMP50]], [[TMP51]] // CHECK2-NEXT: br i1 [[CMP16_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]] // CHECK2: omp.inner.for.body.i: -// CHECK2-NEXT: [[TMP52:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !39 -// CHECK2-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !39 -// CHECK2-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !39 +// CHECK2-NEXT: [[TMP52:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !40 +// CHECK2-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !40 +// CHECK2-NEXT: [[TMP54:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !40 // CHECK2-NEXT: [[SUB17_I:%.*]] = sub i32 [[TMP53]], [[TMP54]] // CHECK2-NEXT: [[SUB18_I:%.*]] = sub i32 [[SUB17_I]], 1 // CHECK2-NEXT: [[CONV22_I:%.*]] = zext i32 [[SUB17_I]] to i64 // CHECK2-NEXT: [[DIV23_I:%.*]] = sdiv i64 [[TMP52]], [[CONV22_I]] // CHECK2-NEXT: [[CONV26_I:%.*]] = trunc i64 [[DIV23_I]] to i32 -// CHECK2-NEXT: store i32 [[CONV26_I]], i32* [[I14_I]], align 4, !noalias !39 -// CHECK2-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !39 +// CHECK2-NEXT: store i32 [[CONV26_I]], i32* [[I14_I]], align 4, !noalias !40 +// CHECK2-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !40 // CHECK2-NEXT: [[CONV27_I:%.*]] = sext i32 [[TMP55]] to i64 -// CHECK2-NEXT: [[TMP56:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !39 -// CHECK2-NEXT: [[TMP57:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !39 -// CHECK2-NEXT: [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !39 -// CHECK2-NEXT: [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !39 +// CHECK2-NEXT: [[TMP56:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !40 +// CHECK2-NEXT: [[TMP57:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !40 +// CHECK2-NEXT: [[TMP58:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !40 +// CHECK2-NEXT: [[TMP59:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !40 // CHECK2-NEXT: [[SUB28_I:%.*]] = sub i32 [[TMP58]], [[TMP59]] // CHECK2-NEXT: [[SUB29_I:%.*]] = sub i32 [[SUB28_I]], 1 // CHECK2-NEXT: [[CONV33_I:%.*]] = zext i32 [[SUB28_I]] to i64 // CHECK2-NEXT: [[DIV34_I:%.*]] = sdiv i64 [[TMP57]], [[CONV33_I]] -// CHECK2-NEXT: [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !39 -// CHECK2-NEXT: [[TMP61:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !39 +// CHECK2-NEXT: [[TMP60:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !40 +// CHECK2-NEXT: [[TMP61:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !40 // CHECK2-NEXT: [[SUB35_I:%.*]] = sub i32 [[TMP60]], [[TMP61]] // CHECK2-NEXT: [[SUB36_I:%.*]] = sub i32 [[SUB35_I]], 1 // CHECK2-NEXT: [[CONV40_I:%.*]] = zext i32 [[SUB35_I]] to i64 @@ -1529,10 +1529,10 @@ struct S { // CHECK2-NEXT: [[SUB42_I:%.*]] = sub nsw i64 [[TMP56]], [[MUL41_I]] // CHECK2-NEXT: [[ADD44_I:%.*]] = add nsw i64 [[CONV27_I]], [[SUB42_I]] // CHECK2-NEXT: [[CONV45_I:%.*]] = trunc i64 [[ADD44_I]] to i32 -// CHECK2-NEXT: store i32 [[CONV45_I]], i32* [[J15_I]], align 4, !noalias !39 -// CHECK2-NEXT: [[TMP62:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !39 +// CHECK2-NEXT: store i32 [[CONV45_I]], i32* [[J15_I]], align 4, !noalias !40 +// CHECK2-NEXT: [[TMP62:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !40 // CHECK2-NEXT: [[ADD46_I:%.*]] = add nsw i64 [[TMP62]], 1 -// CHECK2-NEXT: store i64 [[ADD46_I]], i64* [[DOTOMP_IV_I]], align 8, !noalias !39 +// CHECK2-NEXT: store i64 [[ADD46_I]], i64* [[DOTOMP_IV_I]], align 8, !noalias !40 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I]] // CHECK2: omp.inner.for.end.i: // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__6_EXIT]] @@ -1616,61 +1616,61 @@ struct S { // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 8 // CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK2-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META40:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META45:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META47:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !51 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !51 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !51 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !51 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !51 -// CHECK2-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !51 -// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !51 -// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !51 -// CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !51 -// CHECK2-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !51 -// CHECK2-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !51 -// CHECK2-NEXT: [[TMP20:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !51 -// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !51 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META41:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META44:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META46:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META48:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META50:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !52 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !52 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !52 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !52 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !52 +// CHECK2-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !52 +// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !52 +// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !52 +// CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !52 +// CHECK2-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !52 +// CHECK2-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !52 +// CHECK2-NEXT: [[TMP20:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !52 +// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !52 // CHECK2-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK2-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !51 +// CHECK2-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !52 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK2: omp.inner.for.cond.i: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !51 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !52 // CHECK2-NEXT: [[CONV1_I:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK2-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !51 +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !52 // CHECK2-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV1_I]], [[TMP23]] // CHECK2-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]] // CHECK2: omp.inner.for.body.i: -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !51 -// CHECK2-NEXT: store i32 [[TMP24]], i32* [[I_I]], align 4, !noalias !51 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !51 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !52 +// CHECK2-NEXT: store i32 [[TMP24]], i32* [[I_I]], align 4, !noalias !52 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !52 // CHECK2-NEXT: [[TMP26:%.*]] = call i32 @__kmpc_cancel(%struct.ident_t* @[[GLOB1]], i32 [[TMP25]], i32 4) #[[ATTR2]] // CHECK2-NEXT: [[TMP27:%.*]] = icmp ne i32 [[TMP26]], 0 // CHECK2-NEXT: br i1 [[TMP27]], label [[DOTCANCEL_EXIT_I:%.*]], label [[DOTCANCEL_CONTINUE_I:%.*]] // CHECK2: .cancel.exit.i: -// CHECK2-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !51 +// CHECK2-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !52 // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__9_EXIT:%.*]] // CHECK2: .cancel.continue.i: -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !51 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !52 // CHECK2-NEXT: [[TMP29:%.*]] = call i32 @__kmpc_cancellationpoint(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]], i32 4) #[[ATTR2]] // CHECK2-NEXT: [[TMP30:%.*]] = icmp ne i32 [[TMP29]], 0 // CHECK2-NEXT: br i1 [[TMP30]], label [[DOTCANCEL_EXIT2_I:%.*]], label [[DOTCANCEL_CONTINUE3_I:%.*]] // CHECK2: .cancel.exit2.i: -// CHECK2-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !51 +// CHECK2-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !52 // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__9_EXIT]] // CHECK2: .cancel.continue3.i: -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !51 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !52 // CHECK2-NEXT: [[ADD4_I:%.*]] = add nsw i32 [[TMP31]], 1 -// CHECK2-NEXT: store i32 [[ADD4_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !51 +// CHECK2-NEXT: store i32 [[ADD4_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !52 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I]] // CHECK2: omp.inner.for.end.i: -// CHECK2-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !51 +// CHECK2-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !52 // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__9_EXIT]] // CHECK2: .omp_outlined..9.exit: -// CHECK2-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !51 +// CHECK2-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !52 // CHECK2-NEXT: ret i32 0 // // @@ -1834,60 +1834,60 @@ struct S { // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 8 // CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK2-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META52:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META55:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META57:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META59:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META61:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !63 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !63 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !63 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !63 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !63 -// CHECK2-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !63 -// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !63 -// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !63 -// CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !63 -// CHECK2-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !63 -// CHECK2-NEXT: store %struct.anon.6* [[TMP8]], %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !63 -// CHECK2-NEXT: [[TMP20:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !63 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META53:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META56:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META58:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META60:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META62:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !64 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !64 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !64 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !64 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !64 +// CHECK2-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !64 +// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !64 +// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !64 +// CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !64 +// CHECK2-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !64 +// CHECK2-NEXT: store %struct.anon.6* [[TMP8]], %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !64 +// CHECK2-NEXT: [[TMP20:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !64 // CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_6:%.*]], %struct.anon.6* [[TMP20]], i32 0, i32 0 // CHECK2-NEXT: [[TMP22:%.*]] = load %struct.S*, %struct.S** [[TMP21]], align 8 -// CHECK2-NEXT: store i32* [[TMP_I]], i32** [[TMP1_I]], align 8, !noalias !63 +// CHECK2-NEXT: store i32* [[TMP_I]], i32** [[TMP1_I]], align 8, !noalias !64 // CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP20]], i32 0, i32 1 // CHECK2-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP23]], align 8 // CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK2-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !63 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !63 +// CHECK2-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !64 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !64 // CHECK2-NEXT: [[SUB3_I:%.*]] = sub nsw i32 [[TMP26]], 1 -// CHECK2-NEXT: store i32 [[SUB3_I]], i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !63 -// CHECK2-NEXT: store i32* [[A_I]], i32** [[TMP4_I]], align 8, !noalias !63 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP4_I]], align 8, !noalias !63 +// CHECK2-NEXT: store i32 [[SUB3_I]], i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !64 +// CHECK2-NEXT: store i32* [[A_I]], i32** [[TMP4_I]], align 8, !noalias !64 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP4_I]], align 8, !noalias !64 // CHECK2-NEXT: store i32 0, i32* [[TMP27]], align 4 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !63 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !64 // CHECK2-NEXT: [[CMP_I:%.*]] = icmp slt i32 0, [[TMP28]] // CHECK2-NEXT: br i1 [[CMP_I]], label [[TASKLOOP_IF_THEN_I:%.*]], label [[DOTOMP_OUTLINED__12_EXIT:%.*]] // CHECK2: taskloop.if.then.i: -// CHECK2-NEXT: store i32* [[A5_I]], i32** [[TMP6_I]], align 8, !noalias !63 -// CHECK2-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !63 +// CHECK2-NEXT: store i32* [[A5_I]], i32** [[TMP6_I]], align 8, !noalias !64 +// CHECK2-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !64 // CHECK2-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP29]] to i32 -// CHECK2-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !63 +// CHECK2-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !64 // CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_6]], %struct.anon.6* [[TMP20]], i32 0, i32 1 // CHECK2-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP30]], align 8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK2: omp.inner.for.cond.i: -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !63 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !64 // CHECK2-NEXT: [[CONV7_I:%.*]] = sext i32 [[TMP32]] to i64 -// CHECK2-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !63 +// CHECK2-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !64 // CHECK2-NEXT: [[CMP8_I:%.*]] = icmp ule i64 [[CONV7_I]], [[TMP33]] // CHECK2-NEXT: br i1 [[CMP8_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]] // CHECK2: omp.inner.for.body.i: -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !63 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32*, i32** [[TMP6_I]], align 8, !noalias !63 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !64 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32*, i32** [[TMP6_I]], align 8, !noalias !64 // CHECK2-NEXT: store i32 [[TMP34]], i32* [[TMP35]], align 4 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !63 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !64 // CHECK2-NEXT: [[ADD9_I:%.*]] = add nsw i32 [[TMP36]], 1 -// CHECK2-NEXT: store i32 [[ADD9_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !63 +// CHECK2-NEXT: store i32 [[ADD9_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !64 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I]] // CHECK2: omp.inner.for.end.i: // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__12_EXIT]] diff --git a/clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp index b3557e9639e22..5b971bd16b471 100644 --- a/clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_lastprivate_codegen.cpp @@ -420,25 +420,25 @@ void loop() { // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK1-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, %struct.S**, i32**, [2 x %struct.S]**, [2 x i32]**, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, %struct.S**, i32**, [2 x %struct.S]**, [2 x i32]**, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// CHECK1-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, %struct.S**, i32**, [2 x %struct.S]**, [2 x i32]**, i32**)* // CHECK1-NEXT: call void [[TMP25]](i8* [[TMP24]], %struct.S** [[DOTLASTPRIV_PTR_ADDR_I]], i32** [[DOTLASTPRIV_PTR_ADDR1_I]], [2 x %struct.S]** [[DOTLASTPRIV_PTR_ADDR2_I]], [2 x i32]** [[DOTLASTPRIV_PTR_ADDR3_I]], i32** [[DOTLASTPRIV_PTR_ADDR4_I]]) #[[ATTR4]] // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 3 @@ -455,24 +455,24 @@ void loop() { // CHECK1-NEXT: [[TMP37:%.*]] = load %struct.S*, %struct.S** [[TMP36]], align 8 // CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 4 // CHECK1-NEXT: [[TMP39:%.*]] = load i32*, i32** [[TMP38]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = load %struct.S*, %struct.S** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP42:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[DOTLASTPRIV_PTR_ADDR2_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP43:%.*]] = load [2 x i32]*, [2 x i32]** [[DOTLASTPRIV_PTR_ADDR3_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR4_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 +// CHECK1-NEXT: [[TMP40:%.*]] = load %struct.S*, %struct.S** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP42:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[DOTLASTPRIV_PTR_ADDR2_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP43:%.*]] = load [2 x i32]*, [2 x i32]** [[DOTLASTPRIV_PTR_ADDR3_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR4_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP45]] to i32 -// CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK1: omp.inner.for.cond.i: -// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK1-NEXT: [[CONV5_I:%.*]] = sext i32 [[TMP46]] to i64 -// CHECK1-NEXT: [[TMP47:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 +// CHECK1-NEXT: [[TMP47:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV5_I]], [[TMP47]] // CHECK1-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]] // CHECK1: omp.inner.for.body.i: -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 -// CHECK1-NEXT: store i32 [[TMP48]], i32* [[I_I]], align 4, !noalias !13 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK1-NEXT: store i32 [[TMP48]], i32* [[I_I]], align 4, !noalias !14 // CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[TMP41]], align 4 // CHECK1-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP43]], i64 0, i64 0 // CHECK1-NEXT: store i32 [[TMP49]], i32* [[ARRAYIDX_I]], align 4 @@ -481,12 +481,12 @@ void loop() { // CHECK1-NEXT: [[TMP51:%.*]] = bitcast %struct.S* [[TMP40]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP50]], i8* align 8 [[TMP51]], i64 8, i1 false) #[[ATTR4]] // CHECK1-NEXT: store i32 33, i32* [[TMP44]], align 4 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK1-NEXT: [[ADD7_I:%.*]] = add nsw i32 [[TMP52]], 1 -// CHECK1-NEXT: store i32 [[ADD7_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK1-NEXT: store i32 [[ADD7_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I]] // CHECK1: omp.inner.for.end.i: -// CHECK1-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: [[TMP54:%.*]] = icmp ne i32 [[TMP53]], 0 // CHECK1-NEXT: br i1 [[TMP54]], label [[DOTOMP_LASTPRIVATE_THEN_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] // CHECK1: .omp.lastprivate.then.i: @@ -835,25 +835,25 @@ void loop() { // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 64 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK1-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !27 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !27 -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !27 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.3*, i32**, [2 x i32]**, [2 x %struct.S.0]**, %struct.S.0**)* @.omp_task_privates_map..4 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !27 -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !27 -// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !27 -// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !27 -// CHECK1-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !27 -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !27 -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !27 -// CHECK1-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !27 -// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !27 -// CHECK1-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !27 -// CHECK1-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !27 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META26:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !28 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !28 +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !28 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.3*, i32**, [2 x i32]**, [2 x %struct.S.0]**, %struct.S.0**)* @.omp_task_privates_map..4 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !28 +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !28 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !28 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !28 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !28 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !28 +// CHECK1-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !28 +// CHECK1-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !28 +// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !28 +// CHECK1-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !28 +// CHECK1-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !28 // CHECK1-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, i32**, [2 x i32]**, [2 x %struct.S.0]**, %struct.S.0**)* // CHECK1-NEXT: call void [[TMP25]](i8* [[TMP24]], i32** [[DOTLASTPRIV_PTR_ADDR_I]], [2 x i32]** [[DOTLASTPRIV_PTR_ADDR1_I]], [2 x %struct.S.0]** [[DOTLASTPRIV_PTR_ADDR2_I]], %struct.S.0** [[DOTLASTPRIV_PTR_ADDR3_I]]) #[[ATTR4]] // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP22]], i32 0, i32 1 @@ -868,23 +868,23 @@ void loop() { // CHECK1-NEXT: [[TMP35:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP34]], align 8 // CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP22]], i32 0, i32 3 // CHECK1-NEXT: [[TMP37:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP36]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !27 -// CHECK1-NEXT: [[TMP39:%.*]] = load [2 x i32]*, [2 x i32]** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !27 -// CHECK1-NEXT: [[TMP40:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[DOTLASTPRIV_PTR_ADDR2_I]], align 8, !noalias !27 -// CHECK1-NEXT: [[TMP41:%.*]] = load %struct.S.0*, %struct.S.0** [[DOTLASTPRIV_PTR_ADDR3_I]], align 8, !noalias !27 -// CHECK1-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !27 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !28 +// CHECK1-NEXT: [[TMP39:%.*]] = load [2 x i32]*, [2 x i32]** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !28 +// CHECK1-NEXT: [[TMP40:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[DOTLASTPRIV_PTR_ADDR2_I]], align 8, !noalias !28 +// CHECK1-NEXT: [[TMP41:%.*]] = load %struct.S.0*, %struct.S.0** [[DOTLASTPRIV_PTR_ADDR3_I]], align 8, !noalias !28 +// CHECK1-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !28 // CHECK1-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP42]] to i32 -// CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !27 +// CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !28 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK1: omp.inner.for.cond.i: -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !27 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !28 // CHECK1-NEXT: [[CONV4_I:%.*]] = sext i32 [[TMP43]] to i64 -// CHECK1-NEXT: [[TMP44:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !27 +// CHECK1-NEXT: [[TMP44:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !28 // CHECK1-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV4_I]], [[TMP44]] // CHECK1-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]] // CHECK1: omp.inner.for.body.i: -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !27 -// CHECK1-NEXT: store i32 [[TMP45]], i32* [[I_I]], align 4, !noalias !27 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !28 +// CHECK1-NEXT: store i32 [[TMP45]], i32* [[I_I]], align 4, !noalias !28 // CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP38]], align 128 // CHECK1-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP39]], i64 0, i64 0 // CHECK1-NEXT: store i32 [[TMP46]], i32* [[ARRAYIDX_I]], align 4 @@ -892,12 +892,12 @@ void loop() { // CHECK1-NEXT: [[TMP47:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5_I]] to i8* // CHECK1-NEXT: [[TMP48:%.*]] = bitcast %struct.S.0* [[TMP41]] to i8* // CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 4, i1 false) #[[ATTR4]] -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !27 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !28 // CHECK1-NEXT: [[ADD6_I:%.*]] = add nsw i32 [[TMP49]], 1 -// CHECK1-NEXT: store i32 [[ADD6_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !27 +// CHECK1-NEXT: store i32 [[ADD6_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !28 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I]] // CHECK1: omp.inner.for.end.i: -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !27 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !28 // CHECK1-NEXT: [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0 // CHECK1-NEXT: br i1 [[TMP51]], label [[DOTOMP_LASTPRIVATE_THEN_I:%.*]], label [[DOTOMP_OUTLINED__3_EXIT:%.*]] // CHECK1: .omp.lastprivate.then.i: @@ -1254,25 +1254,25 @@ void loop() { // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK2-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, %struct.S**, i32**, [2 x %struct.S]**, [2 x i32]**, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 -// CHECK2-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, %struct.S**, i32**, [2 x %struct.S]**, [2 x i32]**, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// CHECK2-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK2-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, %struct.S**, i32**, [2 x %struct.S]**, [2 x i32]**, i32**)* // CHECK2-NEXT: call void [[TMP25]](i8* [[TMP24]], %struct.S** [[DOTLASTPRIV_PTR_ADDR_I]], i32** [[DOTLASTPRIV_PTR_ADDR1_I]], [2 x %struct.S]** [[DOTLASTPRIV_PTR_ADDR2_I]], [2 x i32]** [[DOTLASTPRIV_PTR_ADDR3_I]], i32** [[DOTLASTPRIV_PTR_ADDR4_I]]) #[[ATTR4]] // CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 3 @@ -1289,24 +1289,24 @@ void loop() { // CHECK2-NEXT: [[TMP37:%.*]] = load %struct.S*, %struct.S** [[TMP36]], align 8 // CHECK2-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 4 // CHECK2-NEXT: [[TMP39:%.*]] = load i32*, i32** [[TMP38]], align 8 -// CHECK2-NEXT: [[TMP40:%.*]] = load %struct.S*, %struct.S** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP42:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[DOTLASTPRIV_PTR_ADDR2_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP43:%.*]] = load [2 x i32]*, [2 x i32]** [[DOTLASTPRIV_PTR_ADDR3_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR4_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 +// CHECK2-NEXT: [[TMP40:%.*]] = load %struct.S*, %struct.S** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP42:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[DOTLASTPRIV_PTR_ADDR2_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP43:%.*]] = load [2 x i32]*, [2 x i32]** [[DOTLASTPRIV_PTR_ADDR3_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR4_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK2-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP45]] to i32 -// CHECK2-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK2-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK2: omp.inner.for.cond.i: -// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK2-NEXT: [[CONV5_I:%.*]] = sext i32 [[TMP46]] to i64 -// CHECK2-NEXT: [[TMP47:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 +// CHECK2-NEXT: [[TMP47:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 // CHECK2-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV5_I]], [[TMP47]] // CHECK2-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]] // CHECK2: omp.inner.for.body.i: -// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 -// CHECK2-NEXT: store i32 [[TMP48]], i32* [[I_I]], align 4, !noalias !13 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK2-NEXT: store i32 [[TMP48]], i32* [[I_I]], align 4, !noalias !14 // CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[TMP41]], align 4 // CHECK2-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP43]], i64 0, i64 0 // CHECK2-NEXT: store i32 [[TMP49]], i32* [[ARRAYIDX_I]], align 4 @@ -1315,12 +1315,12 @@ void loop() { // CHECK2-NEXT: [[TMP51:%.*]] = bitcast %struct.S* [[TMP40]] to i8* // CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP50]], i8* align 8 [[TMP51]], i64 8, i1 false) #[[ATTR4]] // CHECK2-NEXT: store i32 33, i32* [[TMP44]], align 4 -// CHECK2-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK2-NEXT: [[ADD7_I:%.*]] = add nsw i32 [[TMP52]], 1 -// CHECK2-NEXT: store i32 [[ADD7_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK2-NEXT: store i32 [[ADD7_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I]] // CHECK2: omp.inner.for.end.i: -// CHECK2-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 +// CHECK2-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK2-NEXT: [[TMP54:%.*]] = icmp ne i32 [[TMP53]], 0 // CHECK2-NEXT: br i1 [[TMP54]], label [[DOTOMP_LASTPRIVATE_THEN_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] // CHECK2: .omp.lastprivate.then.i: @@ -1669,25 +1669,25 @@ void loop() { // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 64 // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK2-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !27 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !27 -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !27 -// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.3*, i32**, [2 x i32]**, [2 x %struct.S.0]**, %struct.S.0**)* @.omp_task_privates_map..4 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !27 -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !27 -// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !27 -// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !27 -// CHECK2-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !27 -// CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !27 -// CHECK2-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !27 -// CHECK2-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !27 -// CHECK2-NEXT: [[TMP22:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !27 -// CHECK2-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !27 -// CHECK2-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !27 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META26:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !28 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !28 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !28 +// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.3*, i32**, [2 x i32]**, [2 x %struct.S.0]**, %struct.S.0**)* @.omp_task_privates_map..4 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !28 +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !28 +// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !28 +// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !28 +// CHECK2-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !28 +// CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !28 +// CHECK2-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !28 +// CHECK2-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !28 +// CHECK2-NEXT: [[TMP22:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !28 +// CHECK2-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !28 +// CHECK2-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !28 // CHECK2-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, i32**, [2 x i32]**, [2 x %struct.S.0]**, %struct.S.0**)* // CHECK2-NEXT: call void [[TMP25]](i8* [[TMP24]], i32** [[DOTLASTPRIV_PTR_ADDR_I]], [2 x i32]** [[DOTLASTPRIV_PTR_ADDR1_I]], [2 x %struct.S.0]** [[DOTLASTPRIV_PTR_ADDR2_I]], %struct.S.0** [[DOTLASTPRIV_PTR_ADDR3_I]]) #[[ATTR4]] // CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP22]], i32 0, i32 1 @@ -1702,23 +1702,23 @@ void loop() { // CHECK2-NEXT: [[TMP35:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP34]], align 8 // CHECK2-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP22]], i32 0, i32 3 // CHECK2-NEXT: [[TMP37:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP36]], align 8 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !27 -// CHECK2-NEXT: [[TMP39:%.*]] = load [2 x i32]*, [2 x i32]** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !27 -// CHECK2-NEXT: [[TMP40:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[DOTLASTPRIV_PTR_ADDR2_I]], align 8, !noalias !27 -// CHECK2-NEXT: [[TMP41:%.*]] = load %struct.S.0*, %struct.S.0** [[DOTLASTPRIV_PTR_ADDR3_I]], align 8, !noalias !27 -// CHECK2-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !27 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !28 +// CHECK2-NEXT: [[TMP39:%.*]] = load [2 x i32]*, [2 x i32]** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !28 +// CHECK2-NEXT: [[TMP40:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[DOTLASTPRIV_PTR_ADDR2_I]], align 8, !noalias !28 +// CHECK2-NEXT: [[TMP41:%.*]] = load %struct.S.0*, %struct.S.0** [[DOTLASTPRIV_PTR_ADDR3_I]], align 8, !noalias !28 +// CHECK2-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !28 // CHECK2-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP42]] to i32 -// CHECK2-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !27 +// CHECK2-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !28 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK2: omp.inner.for.cond.i: -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !27 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !28 // CHECK2-NEXT: [[CONV4_I:%.*]] = sext i32 [[TMP43]] to i64 -// CHECK2-NEXT: [[TMP44:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !27 +// CHECK2-NEXT: [[TMP44:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !28 // CHECK2-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV4_I]], [[TMP44]] // CHECK2-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]] // CHECK2: omp.inner.for.body.i: -// CHECK2-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !27 -// CHECK2-NEXT: store i32 [[TMP45]], i32* [[I_I]], align 4, !noalias !27 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !28 +// CHECK2-NEXT: store i32 [[TMP45]], i32* [[I_I]], align 4, !noalias !28 // CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP38]], align 128 // CHECK2-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP39]], i64 0, i64 0 // CHECK2-NEXT: store i32 [[TMP46]], i32* [[ARRAYIDX_I]], align 4 @@ -1726,12 +1726,12 @@ void loop() { // CHECK2-NEXT: [[TMP47:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5_I]] to i8* // CHECK2-NEXT: [[TMP48:%.*]] = bitcast %struct.S.0* [[TMP41]] to i8* // CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 4, i1 false) #[[ATTR4]] -// CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !27 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !28 // CHECK2-NEXT: [[ADD6_I:%.*]] = add nsw i32 [[TMP49]], 1 -// CHECK2-NEXT: store i32 [[ADD6_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !27 +// CHECK2-NEXT: store i32 [[ADD6_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !28 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I]] // CHECK2: omp.inner.for.end.i: -// CHECK2-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !27 +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !28 // CHECK2-NEXT: [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0 // CHECK2-NEXT: br i1 [[TMP51]], label [[DOTOMP_LASTPRIVATE_THEN_I:%.*]], label [[DOTOMP_OUTLINED__3_EXIT:%.*]] // CHECK2: .omp.lastprivate.then.i: @@ -1983,59 +1983,59 @@ void loop() { // CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK3-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, double**, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 -// CHECK3-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, double**, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// CHECK3-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, double**, i32**)* // CHECK3-NEXT: call void [[TMP25]](i8* [[TMP24]], double** [[DOTLASTPRIV_PTR_ADDR_I]], i32** [[DOTLASTPRIV_PTR_ADDR1_I]]) #[[ATTR3:[0-9]+]] // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 0 // CHECK3-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP26]], align 8 // CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 1 // CHECK3-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP28]], align 8 -// CHECK3-NEXT: [[TMP30:%.*]] = load double*, double** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !13 -// CHECK3-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 +// CHECK3-NEXT: [[TMP30:%.*]] = load double*, double** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 +// CHECK3-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP32]] to i32 -// CHECK3-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK3-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK3: omp.inner.for.cond.i: -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK3-NEXT: [[CONV2_I:%.*]] = sext i32 [[TMP33]] to i64 -// CHECK3-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 +// CHECK3-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV2_I]], [[TMP34]] // CHECK3-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]] // CHECK3: omp.inner.for.body.i: -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 -// CHECK3-NEXT: store i32 [[TMP35]], i32* [[I_I]], align 4, !noalias !13 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK3-NEXT: store i32 [[TMP35]], i32* [[I_I]], align 4, !noalias !14 // CHECK3-NEXT: store double 1.000000e+00, double* [[TMP30]], align 8 // CHECK3-NEXT: store i32 11, i32* [[TMP31]], align 4 // CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP_I]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[TMP30]], double** [[TMP36]], align 8, !noalias !13 +// CHECK3-NEXT: store double* [[TMP30]], double** [[TMP36]], align 8, !noalias !14 // CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP_I]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[TMP31]], i32** [[TMP37]], align 8, !noalias !13 +// CHECK3-NEXT: store i32* [[TMP31]], i32** [[TMP37]], align 8, !noalias !14 // CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(16) [[REF_TMP_I]]) #[[ATTR3]] -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK3-NEXT: [[ADD3_I:%.*]] = add nsw i32 [[TMP38]], 1 -// CHECK3-NEXT: store i32 [[ADD3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK3-NEXT: store i32 [[ADD3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND_I]] // CHECK3: omp.inner.for.end.i: -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK3-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 // CHECK3-NEXT: br i1 [[TMP40]], label [[DOTOMP_LASTPRIVATE_THEN_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] // CHECK3: .omp.lastprivate.then.i: @@ -2212,77 +2212,77 @@ void loop() { // CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 // CHECK4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK4-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, double**, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 -// CHECK4-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, double**, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// CHECK4-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK4-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, double**, i32**)* // CHECK4-NEXT: call void [[TMP25]](i8* [[TMP24]], double** [[DOTLASTPRIV_PTR_ADDR_I]], i32** [[DOTLASTPRIV_PTR_ADDR1_I]]) #[[ATTR4:[0-9]+]] // CHECK4-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 0 // CHECK4-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP26]], align 8 // CHECK4-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 1 // CHECK4-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP28]], align 8 -// CHECK4-NEXT: [[TMP30:%.*]] = load double*, double** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !13 -// CHECK4-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 +// CHECK4-NEXT: [[TMP30:%.*]] = load double*, double** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 +// CHECK4-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK4-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP32]] to i32 -// CHECK4-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK4-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK4: omp.inner.for.cond.i: -// CHECK4-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK4-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK4-NEXT: [[CONV2_I:%.*]] = sext i32 [[TMP33]] to i64 -// CHECK4-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 +// CHECK4-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 // CHECK4-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV2_I]], [[TMP34]] // CHECK4-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]] // CHECK4: omp.inner.for.body.i: -// CHECK4-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 -// CHECK4-NEXT: store i32 [[TMP35]], i32* [[I_I]], align 4, !noalias !13 +// CHECK4-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK4-NEXT: store i32 [[TMP35]], i32* [[I_I]], align 4, !noalias !14 // CHECK4-NEXT: store double 1.000000e+00, double* [[TMP30]], align 8 // CHECK4-NEXT: store i32 11, i32* [[TMP31]], align 4 // CHECK4-NEXT: [[BLOCK_ISA_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 0 -// CHECK4-NEXT: store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** [[BLOCK_ISA_I]], align 8, !noalias !13 +// CHECK4-NEXT: store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** [[BLOCK_ISA_I]], align 8, !noalias !14 // CHECK4-NEXT: [[BLOCK_FLAGS_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 1 -// CHECK4-NEXT: store i32 1073741824, i32* [[BLOCK_FLAGS_I]], align 8, !noalias !13 +// CHECK4-NEXT: store i32 1073741824, i32* [[BLOCK_FLAGS_I]], align 8, !noalias !14 // CHECK4-NEXT: [[BLOCK_RESERVED_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 2 -// CHECK4-NEXT: store i32 0, i32* [[BLOCK_RESERVED_I]], align 4, !noalias !13 +// CHECK4-NEXT: store i32 0, i32* [[BLOCK_RESERVED_I]], align 4, !noalias !14 // CHECK4-NEXT: [[BLOCK_INVOKE_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 3 -// CHECK4-NEXT: store i8* bitcast (void (i8*)* @_block_invoke to i8*), i8** [[BLOCK_INVOKE_I]], align 8, !noalias !13 +// CHECK4-NEXT: store i8* bitcast (void (i8*)* @_block_invoke to i8*), i8** [[BLOCK_INVOKE_I]], align 8, !noalias !14 // CHECK4-NEXT: [[BLOCK_DESCRIPTOR_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 4 -// CHECK4-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp.2 to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR_I]], align 8, !noalias !13 +// CHECK4-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp.2 to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR_I]], align 8, !noalias !14 // CHECK4-NEXT: [[BLOCK_CAPTURED_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 5 // CHECK4-NEXT: [[TMP36:%.*]] = load volatile double, double* [[TMP30]], align 8 -// CHECK4-NEXT: store volatile double [[TMP36]], double* [[BLOCK_CAPTURED_I]], align 8, !noalias !13 +// CHECK4-NEXT: store volatile double [[TMP36]], double* [[BLOCK_CAPTURED_I]], align 8, !noalias !14 // CHECK4-NEXT: [[BLOCK_CAPTURED3_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 6 // CHECK4-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP31]], align 4 -// CHECK4-NEXT: store i32 [[TMP37]], i32* [[BLOCK_CAPTURED3_I]], align 8, !noalias !13 +// CHECK4-NEXT: store i32 [[TMP37]], i32* [[BLOCK_CAPTURED3_I]], align 8, !noalias !14 // CHECK4-NEXT: [[TMP38:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]] to void ()* // CHECK4-NEXT: [[BLOCK_LITERAL_I:%.*]] = bitcast void ()* [[TMP38]] to %struct.__block_literal_generic* // CHECK4-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL_I]], i32 0, i32 3 // CHECK4-NEXT: [[TMP40:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL_I]] to i8* -// CHECK4-NEXT: [[TMP41:%.*]] = load i8*, i8** [[TMP39]], align 8, !noalias !13 +// CHECK4-NEXT: [[TMP41:%.*]] = load i8*, i8** [[TMP39]], align 8, !noalias !14 // CHECK4-NEXT: [[TMP42:%.*]] = bitcast i8* [[TMP41]] to void (i8*)* // CHECK4-NEXT: call void [[TMP42]](i8* [[TMP40]]) #[[ATTR4]] -// CHECK4-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK4-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK4-NEXT: [[ADD4_I:%.*]] = add nsw i32 [[TMP43]], 1 -// CHECK4-NEXT: store i32 [[ADD4_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK4-NEXT: store i32 [[ADD4_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND_I]] // CHECK4: omp.inner.for.end.i: -// CHECK4-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 +// CHECK4-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK4-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 // CHECK4-NEXT: br i1 [[TMP45]], label [[DOTOMP_LASTPRIVATE_THEN_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] // CHECK4: .omp.lastprivate.then.i: @@ -2446,54 +2446,54 @@ void loop() { // CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 // CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK5-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK5-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK5-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK5-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, float***, %struct.St***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK5-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK5-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 -// CHECK5-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 -// CHECK5-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !13 -// CHECK5-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 -// CHECK5-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !13 -// CHECK5-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK5-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK5-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK5-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK5-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, float***, %struct.St***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK5-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK5-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK5-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK5-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// CHECK5-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// CHECK5-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// CHECK5-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK5-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 0 // CHECK5-NEXT: [[TMP24:%.*]] = load i64, i64* [[TMP23]], align 8 -// CHECK5-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK5-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 +// CHECK5-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK5-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK5-NEXT: [[TMP27:%.*]] = bitcast void (i8*, ...)* [[TMP25]] to void (i8*, float***, %struct.St***)* // CHECK5-NEXT: call void [[TMP27]](i8* [[TMP26]], float*** [[DOTLASTPRIV_PTR_ADDR_I]], %struct.St*** [[DOTLASTPRIV_PTR_ADDR1_I]]) #[[ATTR2:[0-9]+]] // CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 1 // CHECK5-NEXT: [[TMP29:%.*]] = load float**, float*** [[TMP28]], align 8 // CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 2 // CHECK5-NEXT: [[TMP31:%.*]] = load %struct.St**, %struct.St*** [[TMP30]], align 8 -// CHECK5-NEXT: [[TMP32:%.*]] = load float**, float*** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !13 -// CHECK5-NEXT: [[TMP33:%.*]] = load %struct.St**, %struct.St*** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !13 -// CHECK5-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 +// CHECK5-NEXT: [[TMP32:%.*]] = load float**, float*** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// CHECK5-NEXT: [[TMP33:%.*]] = load %struct.St**, %struct.St*** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 +// CHECK5-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK5-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP34]] to i32 -// CHECK5-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK5-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK5: omp.inner.for.cond.i: -// CHECK5-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK5-NEXT: [[CONV2_I:%.*]] = sext i32 [[TMP35]] to i64 -// CHECK5-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 +// CHECK5-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 // CHECK5-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV2_I]], [[TMP36]] // CHECK5-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]] // CHECK5: omp.inner.for.body.i: -// CHECK5-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 -// CHECK5-NEXT: store i32 [[TMP37]], i32* [[I_I]], align 4, !noalias !13 -// CHECK5-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK5-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK5-NEXT: store i32 [[TMP37]], i32* [[I_I]], align 4, !noalias !14 +// CHECK5-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK5-NEXT: [[ADD3_I:%.*]] = add nsw i32 [[TMP38]], 1 -// CHECK5-NEXT: store i32 [[ADD3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK5-NEXT: store i32 [[ADD3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND_I]] // CHECK5: omp.inner.for.end.i: -// CHECK5-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 +// CHECK5-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK5-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 // CHECK5-NEXT: br i1 [[TMP40]], label [[DOTOMP_LASTPRIVATE_THEN_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] // CHECK5: .omp.lastprivate.then.i: @@ -2634,54 +2634,54 @@ void loop() { // CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 // CHECK6-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK6-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK6-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK6-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK6-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK6-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK6-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 -// CHECK6-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 -// CHECK6-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !13 -// CHECK6-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 -// CHECK6-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !13 -// CHECK6-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK6-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK6-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK6-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK6-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK6-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK6-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK6-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK6-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK6-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK6-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// CHECK6-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// CHECK6-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// CHECK6-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK6-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK6-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK6-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK6-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, i32**)* // CHECK6-NEXT: call void [[TMP25]](i8* [[TMP24]], i32** [[DOTLASTPRIV_PTR_ADDR_I]]) #[[ATTR2:[0-9]+]] // CHECK6-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 0 // CHECK6-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP26]], align 8 -// CHECK6-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !13 -// CHECK6-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 +// CHECK6-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// CHECK6-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK6-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP29]] to i32 -// CHECK6-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK6-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK6: omp.inner.for.cond.i: -// CHECK6-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK6-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK6-NEXT: [[CONV1_I:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK6-NEXT: [[TMP31:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 +// CHECK6-NEXT: [[TMP31:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 // CHECK6-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV1_I]], [[TMP31]] // CHECK6-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]] // CHECK6: omp.inner.for.body.i: -// CHECK6-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 -// CHECK6-NEXT: store i32 [[TMP32]], i32* [[I_I]], align 4, !noalias !13 -// CHECK6-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK6-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK6-NEXT: store i32 [[TMP32]], i32* [[I_I]], align 4, !noalias !14 +// CHECK6-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK6-NEXT: [[ADD2_I:%.*]] = add nsw i32 [[TMP33]], 1 -// CHECK6-NEXT: store i32 [[ADD2_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK6-NEXT: store i32 [[ADD2_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND_I]] // CHECK6: omp.inner.for.end.i: -// CHECK6-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 +// CHECK6-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK6-NEXT: [[TMP35:%.*]] = icmp ne i32 [[TMP34]], 0 // CHECK6-NEXT: br i1 [[TMP35]], label [[DOTOMP_LASTPRIVATE_THEN_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] // CHECK6: .omp.lastprivate.then.i: -// CHECK6-NEXT: store i32 10, i32* [[I_I]], align 4, !noalias !13 -// CHECK6-NEXT: [[TMP36:%.*]] = load i32, i32* [[I_I]], align 4, !noalias !13 +// CHECK6-NEXT: store i32 10, i32* [[I_I]], align 4, !noalias !14 +// CHECK6-NEXT: [[TMP36:%.*]] = load i32, i32* [[I_I]], align 4, !noalias !14 // CHECK6-NEXT: store i32 [[TMP36]], i32* [[TMP27]], align 4 // CHECK6-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK6: .omp_outlined..1.exit: @@ -2705,4 +2705,3 @@ void loop() { // CHECK6-NEXT: [[TMP7:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK6-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp index 3fbb0a3478145..f738761e7ff7c 100644 --- a/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_simd_codegen.cpp @@ -204,40 +204,40 @@ struct S { // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 8 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 -// CHECK1-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// CHECK1-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK1: omp.inner.for.cond.i: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK1-NEXT: [[CONV1_I:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV1_I]], [[TMP23]] // CHECK1-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] // CHECK1: omp.inner.for.body.i: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 -// CHECK1-NEXT: store i32 [[TMP24]], i32* [[I_I]], align 4, !noalias !13 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK1-NEXT: store i32 [[TMP24]], i32* [[I_I]], align 4, !noalias !14 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK1-NEXT: [[ADD2_I:%.*]] = add nsw i32 [[TMP25]], 1 -// CHECK1-NEXT: store i32 [[ADD2_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD2_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK1: .omp_outlined..1.exit: // CHECK1-NEXT: ret i32 0 // @@ -320,40 +320,40 @@ struct S { // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 8 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META26:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META28:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !30 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !30 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !30 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !30 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !30 -// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !30 -// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !30 -// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !30 -// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !30 -// CHECK1-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !30 -// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !30 -// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !30 -// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !30 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META27:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !31 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !31 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !31 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !31 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !31 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !31 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !31 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !31 +// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !31 +// CHECK1-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !31 +// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 +// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 +// CHECK1-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !31 // CHECK1-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !30 +// CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !31 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK1: omp.inner.for.cond.i: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !30, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group !32 // CHECK1-NEXT: [[CONV1_I:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK1-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !30, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !31, !llvm.access.group !32 // CHECK1-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV1_I]], [[TMP23]] // CHECK1-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__3_EXIT:%.*]] // CHECK1: omp.inner.for.body.i: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !30, !llvm.access.group !31 -// CHECK1-NEXT: store i32 [[TMP24]], i32* [[I_I]], align 4, !noalias !30, !llvm.access.group !31 -// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !30, !llvm.access.group !31 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group !32 +// CHECK1-NEXT: store i32 [[TMP24]], i32* [[I_I]], align 4, !noalias !31, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group !32 // CHECK1-NEXT: [[ADD2_I:%.*]] = add nsw i32 [[TMP25]], 1 -// CHECK1-NEXT: store i32 [[ADD2_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !30, !llvm.access.group !31 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD2_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group !32 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: .omp_outlined..3.exit: // CHECK1-NEXT: ret i32 0 // @@ -523,38 +523,38 @@ struct S { // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK1-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META35:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META40:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META42:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META44:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !46 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !46 -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !46 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !46 -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !46 -// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !46 -// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !46 -// CHECK1-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !46 -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !46 -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !46 -// CHECK1-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !46 -// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !46 -// CHECK1-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !46 -// CHECK1-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !46 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META36:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META39:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META41:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META45:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !47 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !47 +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !47 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !47 +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !47 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !47 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !47 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !47 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !47 +// CHECK1-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !47 +// CHECK1-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !47 +// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !47 +// CHECK1-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !47 +// CHECK1-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !47 // CHECK1-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, i32**)* // CHECK1-NEXT: call void [[TMP25]](i8* [[TMP24]], i32** [[DOTLASTPRIV_PTR_ADDR_I]]) #[[ATTR2]] // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP22]], i32 0, i32 0 // CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP26]], align 8 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !46 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !47 // CHECK1-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 1 // CHECK1-NEXT: [[TMP30:%.*]] = load i32*, i32** [[TMP29]], align 8 // CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 -// CHECK1-NEXT: store i32 [[TMP31]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !46 +// CHECK1-NEXT: store i32 [[TMP31]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !47 // CHECK1-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 1 // CHECK1-NEXT: [[TMP33:%.*]] = load i32*, i32** [[TMP32]], align 8 // CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK1-NEXT: store i32 [[TMP34]], i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46 +// CHECK1-NEXT: store i32 [[TMP34]], i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 // CHECK1-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 2 // CHECK1-NEXT: [[TMP36:%.*]] = load i8***, i8**** [[TMP35]], align 8 // CHECK1-NEXT: [[TMP37:%.*]] = load i8**, i8*** [[TMP36]], align 8 @@ -571,63 +571,63 @@ struct S { // CHECK1-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i8, i8* [[TMP41]], i64 [[IDXPROM4_I]] // CHECK1-NEXT: [[TMP45:%.*]] = load i8, i8* [[ARRAYIDX5_I]], align 1 // CHECK1-NEXT: [[CONV_I:%.*]] = sext i8 [[TMP45]] to i32 -// CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !46 -// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !46 +// CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !47 // CHECK1-NEXT: [[CONV7_I:%.*]] = sext i32 [[TMP46]] to i64 -// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !46 -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 // CHECK1-NEXT: [[SUB8_I:%.*]] = sub i32 [[TMP47]], [[TMP48]] // CHECK1-NEXT: [[SUB9_I:%.*]] = sub i32 [[SUB8_I]], 1 // CHECK1-NEXT: [[CONV11_I:%.*]] = zext i32 [[SUB8_I]] to i64 // CHECK1-NEXT: [[MUL_I:%.*]] = mul nsw i64 [[CONV7_I]], [[CONV11_I]] // CHECK1-NEXT: [[SUB12_I:%.*]] = sub nsw i64 [[MUL_I]], 1 -// CHECK1-NEXT: store i64 [[SUB12_I]], i64* [[DOTCAPTURE_EXPR_6_I]], align 8, !noalias !46 -// CHECK1-NEXT: store i32 0, i32* [[I_I]], align 4, !noalias !46 -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46 -// CHECK1-NEXT: store i32 [[TMP49]], i32* [[J_I]], align 4, !noalias !46 -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !46 +// CHECK1-NEXT: store i64 [[SUB12_I]], i64* [[DOTCAPTURE_EXPR_6_I]], align 8, !noalias !47 +// CHECK1-NEXT: store i32 0, i32* [[I_I]], align 4, !noalias !47 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 +// CHECK1-NEXT: store i32 [[TMP49]], i32* [[J_I]], align 4, !noalias !47 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !47 // CHECK1-NEXT: [[CMP_I:%.*]] = icmp slt i32 0, [[TMP50]] // CHECK1-NEXT: br i1 [[CMP_I]], label [[LAND_LHS_TRUE_I:%.*]], label [[TASKLOOP_IF_END_I:%.*]] // CHECK1: land.lhs.true.i: -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !46 +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47 // CHECK1-NEXT: [[CMP13_I:%.*]] = icmp slt i32 [[TMP51]], [[TMP52]] // CHECK1-NEXT: br i1 [[CMP13_I]], label [[TASKLOOP_IF_THEN_I:%.*]], label [[TASKLOOP_IF_END_I]] // CHECK1: taskloop.if.then.i: -// CHECK1-NEXT: [[TMP53:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !46 -// CHECK1-NEXT: store i64 [[TMP53]], i64* [[DOTOMP_IV_I]], align 8, !noalias !46 +// CHECK1-NEXT: [[TMP53:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !47 +// CHECK1-NEXT: store i64 [[TMP53]], i64* [[DOTOMP_IV_I]], align 8, !noalias !47 // CHECK1-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 1 // CHECK1-NEXT: [[TMP55:%.*]] = load i32*, i32** [[TMP54]], align 8 // CHECK1-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 2 // CHECK1-NEXT: [[TMP57:%.*]] = load i8***, i8**** [[TMP56]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK1: omp.inner.for.cond.i: -// CHECK1-NEXT: [[TMP58:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !46, !llvm.access.group !47 -// CHECK1-NEXT: [[TMP59:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !46, !llvm.access.group !47 +// CHECK1-NEXT: [[TMP58:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group !48 +// CHECK1-NEXT: [[TMP59:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !47, !llvm.access.group !48 // CHECK1-NEXT: [[CMP16_I:%.*]] = icmp ule i64 [[TMP58]], [[TMP59]] // CHECK1-NEXT: br i1 [[CMP16_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]] // CHECK1: omp.inner.for.body.i: -// CHECK1-NEXT: [[TMP60:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !46, !llvm.access.group !47 -// CHECK1-NEXT: [[TMP61:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !46, !llvm.access.group !47 -// CHECK1-NEXT: [[TMP62:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46, !llvm.access.group !47 +// CHECK1-NEXT: [[TMP60:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group !48 +// CHECK1-NEXT: [[TMP61:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47, !llvm.access.group !48 +// CHECK1-NEXT: [[TMP62:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47, !llvm.access.group !48 // CHECK1-NEXT: [[SUB17_I:%.*]] = sub i32 [[TMP61]], [[TMP62]] // CHECK1-NEXT: [[SUB18_I:%.*]] = sub i32 [[SUB17_I]], 1 // CHECK1-NEXT: [[CONV22_I:%.*]] = zext i32 [[SUB17_I]] to i64 // CHECK1-NEXT: [[DIV23_I:%.*]] = sdiv i64 [[TMP60]], [[CONV22_I]] // CHECK1-NEXT: [[CONV26_I:%.*]] = trunc i64 [[DIV23_I]] to i32 -// CHECK1-NEXT: store i32 [[CONV26_I]], i32* [[I14_I]], align 4, !noalias !46, !llvm.access.group !47 -// CHECK1-NEXT: [[TMP63:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46, !llvm.access.group !47 +// CHECK1-NEXT: store i32 [[CONV26_I]], i32* [[I14_I]], align 4, !noalias !47, !llvm.access.group !48 +// CHECK1-NEXT: [[TMP63:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47, !llvm.access.group !48 // CHECK1-NEXT: [[CONV27_I:%.*]] = sext i32 [[TMP63]] to i64 -// CHECK1-NEXT: [[TMP64:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !46, !llvm.access.group !47 -// CHECK1-NEXT: [[TMP65:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !46, !llvm.access.group !47 -// CHECK1-NEXT: [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !46, !llvm.access.group !47 -// CHECK1-NEXT: [[TMP67:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46, !llvm.access.group !47 +// CHECK1-NEXT: [[TMP64:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group !48 +// CHECK1-NEXT: [[TMP65:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group !48 +// CHECK1-NEXT: [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47, !llvm.access.group !48 +// CHECK1-NEXT: [[TMP67:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47, !llvm.access.group !48 // CHECK1-NEXT: [[SUB28_I:%.*]] = sub i32 [[TMP66]], [[TMP67]] // CHECK1-NEXT: [[SUB29_I:%.*]] = sub i32 [[SUB28_I]], 1 // CHECK1-NEXT: [[CONV33_I:%.*]] = zext i32 [[SUB28_I]] to i64 // CHECK1-NEXT: [[DIV34_I:%.*]] = sdiv i64 [[TMP65]], [[CONV33_I]] -// CHECK1-NEXT: [[TMP68:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !46, !llvm.access.group !47 -// CHECK1-NEXT: [[TMP69:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46, !llvm.access.group !47 +// CHECK1-NEXT: [[TMP68:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47, !llvm.access.group !48 +// CHECK1-NEXT: [[TMP69:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47, !llvm.access.group !48 // CHECK1-NEXT: [[SUB35_I:%.*]] = sub i32 [[TMP68]], [[TMP69]] // CHECK1-NEXT: [[SUB36_I:%.*]] = sub i32 [[SUB35_I]], 1 // CHECK1-NEXT: [[CONV40_I:%.*]] = zext i32 [[SUB35_I]] to i64 @@ -635,15 +635,15 @@ struct S { // CHECK1-NEXT: [[SUB42_I:%.*]] = sub nsw i64 [[TMP64]], [[MUL41_I]] // CHECK1-NEXT: [[ADD44_I:%.*]] = add nsw i64 [[CONV27_I]], [[SUB42_I]] // CHECK1-NEXT: [[CONV45_I:%.*]] = trunc i64 [[ADD44_I]] to i32 -// CHECK1-NEXT: store i32 [[CONV45_I]], i32* [[J15_I]], align 4, !noalias !46, !llvm.access.group !47 -// CHECK1-NEXT: [[TMP70:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !46, !llvm.access.group !47 +// CHECK1-NEXT: store i32 [[CONV45_I]], i32* [[J15_I]], align 4, !noalias !47, !llvm.access.group !48 +// CHECK1-NEXT: [[TMP70:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group !48 // CHECK1-NEXT: [[ADD46_I:%.*]] = add nsw i64 [[TMP70]], 1 -// CHECK1-NEXT: store i64 [[ADD46_I]], i64* [[DOTOMP_IV_I]], align 8, !noalias !46, !llvm.access.group !47 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP48:![0-9]+]] +// CHECK1-NEXT: store i64 [[ADD46_I]], i64* [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group !48 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK1: omp.inner.for.end.i: // CHECK1-NEXT: br label [[TASKLOOP_IF_END_I]] // CHECK1: taskloop.if.end.i: -// CHECK1-NEXT: [[TMP71:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !46 +// CHECK1-NEXT: [[TMP71:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !47 // CHECK1-NEXT: [[TMP72:%.*]] = icmp ne i32 [[TMP71]], 0 // CHECK1-NEXT: br i1 [[TMP72]], label [[DOTOMP_LASTPRIVATE_THEN_I:%.*]], label [[DOTOMP_OUTLINED__6_EXIT:%.*]] // CHECK1: .omp.lastprivate.then.i: @@ -830,61 +830,61 @@ struct S { // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 8 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META50:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META53:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META55:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META57:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META59:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !61 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !61 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !61 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !61 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !61 -// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !61 -// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !61 -// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !61 -// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !61 -// CHECK1-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !61 -// CHECK1-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !61 -// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !61 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META51:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META54:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META56:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META58:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META60:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !62 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !62 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !62 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !62 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !62 +// CHECK1-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !62 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !62 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !62 +// CHECK1-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !62 +// CHECK1-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !62 +// CHECK1-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 +// CHECK1-NEXT: [[TMP20:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP20]], i32 0, i32 0 // CHECK1-NEXT: [[TMP22:%.*]] = load %struct.S*, %struct.S** [[TMP21]], align 8 -// CHECK1-NEXT: store i32* [[TMP_I]], i32** [[TMP1_I]], align 8, !noalias !61 +// CHECK1-NEXT: store i32* [[TMP_I]], i32** [[TMP1_I]], align 8, !noalias !62 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP20]], i32 0, i32 1 // CHECK1-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP23]], align 8 // CHECK1-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK1-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !61 -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !61 +// CHECK1-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !62 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !62 // CHECK1-NEXT: [[SUB3_I:%.*]] = sub nsw i32 [[TMP26]], 1 -// CHECK1-NEXT: store i32 [[SUB3_I]], i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !61 -// CHECK1-NEXT: store i32* [[A_I]], i32** [[TMP4_I]], align 8, !noalias !61 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP4_I]], align 8, !noalias !61 +// CHECK1-NEXT: store i32 [[SUB3_I]], i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !62 +// CHECK1-NEXT: store i32* [[A_I]], i32** [[TMP4_I]], align 8, !noalias !62 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP4_I]], align 8, !noalias !62 // CHECK1-NEXT: store i32 0, i32* [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !61 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !62 // CHECK1-NEXT: [[CMP_I:%.*]] = icmp slt i32 0, [[TMP28]] // CHECK1-NEXT: br i1 [[CMP_I]], label [[TASKLOOP_IF_THEN_I:%.*]], label [[DOTOMP_OUTLINED__9_EXIT:%.*]] // CHECK1: taskloop.if.then.i: -// CHECK1-NEXT: store i32* [[A5_I]], i32** [[TMP6_I]], align 8, !noalias !61 -// CHECK1-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !61 +// CHECK1-NEXT: store i32* [[A5_I]], i32** [[TMP6_I]], align 8, !noalias !62 +// CHECK1-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !62 // CHECK1-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP29]] to i32 -// CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !61 +// CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !62 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP20]], i32 0, i32 1 // CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP30]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK1: omp.inner.for.cond.i: -// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !61, !llvm.access.group !62 +// CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !62, !llvm.access.group !63 // CHECK1-NEXT: [[CONV7_I:%.*]] = sext i32 [[TMP32]] to i64 -// CHECK1-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !61, !llvm.access.group !62 +// CHECK1-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !62, !llvm.access.group !63 // CHECK1-NEXT: [[CMP8_I:%.*]] = icmp ule i64 [[CONV7_I]], [[TMP33]] // CHECK1-NEXT: br i1 [[CMP8_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]] // CHECK1: omp.inner.for.body.i: -// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !61, !llvm.access.group !62 -// CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[TMP6_I]], align 8, !noalias !61, !llvm.access.group !62 -// CHECK1-NEXT: store i32 [[TMP34]], i32* [[TMP35]], align 4, !llvm.access.group !62 -// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !61, !llvm.access.group !62 +// CHECK1-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !62, !llvm.access.group !63 +// CHECK1-NEXT: [[TMP35:%.*]] = load i32*, i32** [[TMP6_I]], align 8, !noalias !62, !llvm.access.group !63 +// CHECK1-NEXT: store i32 [[TMP34]], i32* [[TMP35]], align 4, !llvm.access.group !63 +// CHECK1-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !62, !llvm.access.group !63 // CHECK1-NEXT: [[ADD9_I:%.*]] = add nsw i32 [[TMP36]], 1 -// CHECK1-NEXT: store i32 [[ADD9_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !61, !llvm.access.group !62 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP63:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD9_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !62, !llvm.access.group !63 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP64:![0-9]+]] // CHECK1: omp.inner.for.end.i: // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__9_EXIT]] // CHECK1: .omp_outlined..9.exit: @@ -1049,40 +1049,40 @@ struct S { // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 8 // CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK2-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 -// CHECK2-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP20:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// CHECK2-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP20:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK2-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK2-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK2-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK2: omp.inner.for.cond.i: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK2-NEXT: [[CONV1_I:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK2-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 // CHECK2-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV1_I]], [[TMP23]] // CHECK2-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] // CHECK2: omp.inner.for.body.i: -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 -// CHECK2-NEXT: store i32 [[TMP24]], i32* [[I_I]], align 4, !noalias !13 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK2-NEXT: store i32 [[TMP24]], i32* [[I_I]], align 4, !noalias !14 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK2-NEXT: [[ADD2_I:%.*]] = add nsw i32 [[TMP25]], 1 -// CHECK2-NEXT: store i32 [[ADD2_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD2_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK2: .omp_outlined..1.exit: // CHECK2-NEXT: ret i32 0 // @@ -1165,40 +1165,40 @@ struct S { // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 8 // CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK2-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META26:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META28:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !30 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !30 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !30 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !30 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !30 -// CHECK2-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !30 -// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !30 -// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !30 -// CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !30 -// CHECK2-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !30 -// CHECK2-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !30 -// CHECK2-NEXT: [[TMP20:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !30 -// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !30 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META27:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !31 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !31 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !31 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !31 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !31 +// CHECK2-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !31 +// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !31 +// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !31 +// CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !31 +// CHECK2-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !31 +// CHECK2-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 +// CHECK2-NEXT: [[TMP20:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 +// CHECK2-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !31 // CHECK2-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK2-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !30 +// CHECK2-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !31 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK2: omp.inner.for.cond.i: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !30, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group !32 // CHECK2-NEXT: [[CONV1_I:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK2-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !30, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !31, !llvm.access.group !32 // CHECK2-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV1_I]], [[TMP23]] // CHECK2-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__3_EXIT:%.*]] // CHECK2: omp.inner.for.body.i: -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !30, !llvm.access.group !31 -// CHECK2-NEXT: store i32 [[TMP24]], i32* [[I_I]], align 4, !noalias !30, !llvm.access.group !31 -// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !30, !llvm.access.group !31 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group !32 +// CHECK2-NEXT: store i32 [[TMP24]], i32* [[I_I]], align 4, !noalias !31, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group !32 // CHECK2-NEXT: [[ADD2_I:%.*]] = add nsw i32 [[TMP25]], 1 -// CHECK2-NEXT: store i32 [[ADD2_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !30, !llvm.access.group !31 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD2_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group !32 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK2: .omp_outlined..3.exit: // CHECK2-NEXT: ret i32 0 // @@ -1368,38 +1368,38 @@ struct S { // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK2-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META35:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META40:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META42:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META44:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !46 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !46 -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !46 -// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !46 -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !46 -// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !46 -// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !46 -// CHECK2-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !46 -// CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !46 -// CHECK2-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !46 -// CHECK2-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !46 -// CHECK2-NEXT: [[TMP22:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !46 -// CHECK2-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !46 -// CHECK2-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !46 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META36:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META39:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META41:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META45:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !47 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !47 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !47 +// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !47 +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !47 +// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !47 +// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !47 +// CHECK2-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !47 +// CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !47 +// CHECK2-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !47 +// CHECK2-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !47 +// CHECK2-NEXT: [[TMP22:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !47 +// CHECK2-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !47 +// CHECK2-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !47 // CHECK2-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, i32**)* // CHECK2-NEXT: call void [[TMP25]](i8* [[TMP24]], i32** [[DOTLASTPRIV_PTR_ADDR_I]]) #[[ATTR2]] // CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP22]], i32 0, i32 0 // CHECK2-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP26]], align 8 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !46 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !47 // CHECK2-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 1 // CHECK2-NEXT: [[TMP30:%.*]] = load i32*, i32** [[TMP29]], align 8 // CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 -// CHECK2-NEXT: store i32 [[TMP31]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !46 +// CHECK2-NEXT: store i32 [[TMP31]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !47 // CHECK2-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 1 // CHECK2-NEXT: [[TMP33:%.*]] = load i32*, i32** [[TMP32]], align 8 // CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK2-NEXT: store i32 [[TMP34]], i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46 +// CHECK2-NEXT: store i32 [[TMP34]], i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 // CHECK2-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 2 // CHECK2-NEXT: [[TMP36:%.*]] = load i8***, i8**** [[TMP35]], align 8 // CHECK2-NEXT: [[TMP37:%.*]] = load i8**, i8*** [[TMP36]], align 8 @@ -1416,63 +1416,63 @@ struct S { // CHECK2-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i8, i8* [[TMP41]], i64 [[IDXPROM4_I]] // CHECK2-NEXT: [[TMP45:%.*]] = load i8, i8* [[ARRAYIDX5_I]], align 1 // CHECK2-NEXT: [[CONV_I:%.*]] = sext i8 [[TMP45]] to i32 -// CHECK2-NEXT: store i32 [[CONV_I]], i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !46 -// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !46 +// CHECK2-NEXT: store i32 [[CONV_I]], i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !47 // CHECK2-NEXT: [[CONV7_I:%.*]] = sext i32 [[TMP46]] to i64 -// CHECK2-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !46 -// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 // CHECK2-NEXT: [[SUB8_I:%.*]] = sub i32 [[TMP47]], [[TMP48]] // CHECK2-NEXT: [[SUB9_I:%.*]] = sub i32 [[SUB8_I]], 1 // CHECK2-NEXT: [[CONV11_I:%.*]] = zext i32 [[SUB8_I]] to i64 // CHECK2-NEXT: [[MUL_I:%.*]] = mul nsw i64 [[CONV7_I]], [[CONV11_I]] // CHECK2-NEXT: [[SUB12_I:%.*]] = sub nsw i64 [[MUL_I]], 1 -// CHECK2-NEXT: store i64 [[SUB12_I]], i64* [[DOTCAPTURE_EXPR_6_I]], align 8, !noalias !46 -// CHECK2-NEXT: store i32 0, i32* [[I_I]], align 4, !noalias !46 -// CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46 -// CHECK2-NEXT: store i32 [[TMP49]], i32* [[J_I]], align 4, !noalias !46 -// CHECK2-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !46 +// CHECK2-NEXT: store i64 [[SUB12_I]], i64* [[DOTCAPTURE_EXPR_6_I]], align 8, !noalias !47 +// CHECK2-NEXT: store i32 0, i32* [[I_I]], align 4, !noalias !47 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 +// CHECK2-NEXT: store i32 [[TMP49]], i32* [[J_I]], align 4, !noalias !47 +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !47 // CHECK2-NEXT: [[CMP_I:%.*]] = icmp slt i32 0, [[TMP50]] // CHECK2-NEXT: br i1 [[CMP_I]], label [[LAND_LHS_TRUE_I:%.*]], label [[TASKLOOP_IF_END_I:%.*]] // CHECK2: land.lhs.true.i: -// CHECK2-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46 -// CHECK2-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !46 +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47 // CHECK2-NEXT: [[CMP13_I:%.*]] = icmp slt i32 [[TMP51]], [[TMP52]] // CHECK2-NEXT: br i1 [[CMP13_I]], label [[TASKLOOP_IF_THEN_I:%.*]], label [[TASKLOOP_IF_END_I]] // CHECK2: taskloop.if.then.i: -// CHECK2-NEXT: [[TMP53:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !46 -// CHECK2-NEXT: store i64 [[TMP53]], i64* [[DOTOMP_IV_I]], align 8, !noalias !46 +// CHECK2-NEXT: [[TMP53:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !47 +// CHECK2-NEXT: store i64 [[TMP53]], i64* [[DOTOMP_IV_I]], align 8, !noalias !47 // CHECK2-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 1 // CHECK2-NEXT: [[TMP55:%.*]] = load i32*, i32** [[TMP54]], align 8 // CHECK2-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 2 // CHECK2-NEXT: [[TMP57:%.*]] = load i8***, i8**** [[TMP56]], align 8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK2: omp.inner.for.cond.i: -// CHECK2-NEXT: [[TMP58:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !46, !llvm.access.group !47 -// CHECK2-NEXT: [[TMP59:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !46, !llvm.access.group !47 +// CHECK2-NEXT: [[TMP58:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group !48 +// CHECK2-NEXT: [[TMP59:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !47, !llvm.access.group !48 // CHECK2-NEXT: [[CMP16_I:%.*]] = icmp ule i64 [[TMP58]], [[TMP59]] // CHECK2-NEXT: br i1 [[CMP16_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]] // CHECK2: omp.inner.for.body.i: -// CHECK2-NEXT: [[TMP60:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !46, !llvm.access.group !47 -// CHECK2-NEXT: [[TMP61:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !46, !llvm.access.group !47 -// CHECK2-NEXT: [[TMP62:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46, !llvm.access.group !47 +// CHECK2-NEXT: [[TMP60:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group !48 +// CHECK2-NEXT: [[TMP61:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47, !llvm.access.group !48 +// CHECK2-NEXT: [[TMP62:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47, !llvm.access.group !48 // CHECK2-NEXT: [[SUB17_I:%.*]] = sub i32 [[TMP61]], [[TMP62]] // CHECK2-NEXT: [[SUB18_I:%.*]] = sub i32 [[SUB17_I]], 1 // CHECK2-NEXT: [[CONV22_I:%.*]] = zext i32 [[SUB17_I]] to i64 // CHECK2-NEXT: [[DIV23_I:%.*]] = sdiv i64 [[TMP60]], [[CONV22_I]] // CHECK2-NEXT: [[CONV26_I:%.*]] = trunc i64 [[DIV23_I]] to i32 -// CHECK2-NEXT: store i32 [[CONV26_I]], i32* [[I14_I]], align 4, !noalias !46, !llvm.access.group !47 -// CHECK2-NEXT: [[TMP63:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46, !llvm.access.group !47 +// CHECK2-NEXT: store i32 [[CONV26_I]], i32* [[I14_I]], align 4, !noalias !47, !llvm.access.group !48 +// CHECK2-NEXT: [[TMP63:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47, !llvm.access.group !48 // CHECK2-NEXT: [[CONV27_I:%.*]] = sext i32 [[TMP63]] to i64 -// CHECK2-NEXT: [[TMP64:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !46, !llvm.access.group !47 -// CHECK2-NEXT: [[TMP65:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !46, !llvm.access.group !47 -// CHECK2-NEXT: [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !46, !llvm.access.group !47 -// CHECK2-NEXT: [[TMP67:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46, !llvm.access.group !47 +// CHECK2-NEXT: [[TMP64:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group !48 +// CHECK2-NEXT: [[TMP65:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group !48 +// CHECK2-NEXT: [[TMP66:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47, !llvm.access.group !48 +// CHECK2-NEXT: [[TMP67:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47, !llvm.access.group !48 // CHECK2-NEXT: [[SUB28_I:%.*]] = sub i32 [[TMP66]], [[TMP67]] // CHECK2-NEXT: [[SUB29_I:%.*]] = sub i32 [[SUB28_I]], 1 // CHECK2-NEXT: [[CONV33_I:%.*]] = zext i32 [[SUB28_I]] to i64 // CHECK2-NEXT: [[DIV34_I:%.*]] = sdiv i64 [[TMP65]], [[CONV33_I]] -// CHECK2-NEXT: [[TMP68:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !46, !llvm.access.group !47 -// CHECK2-NEXT: [[TMP69:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46, !llvm.access.group !47 +// CHECK2-NEXT: [[TMP68:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47, !llvm.access.group !48 +// CHECK2-NEXT: [[TMP69:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47, !llvm.access.group !48 // CHECK2-NEXT: [[SUB35_I:%.*]] = sub i32 [[TMP68]], [[TMP69]] // CHECK2-NEXT: [[SUB36_I:%.*]] = sub i32 [[SUB35_I]], 1 // CHECK2-NEXT: [[CONV40_I:%.*]] = zext i32 [[SUB35_I]] to i64 @@ -1480,15 +1480,15 @@ struct S { // CHECK2-NEXT: [[SUB42_I:%.*]] = sub nsw i64 [[TMP64]], [[MUL41_I]] // CHECK2-NEXT: [[ADD44_I:%.*]] = add nsw i64 [[CONV27_I]], [[SUB42_I]] // CHECK2-NEXT: [[CONV45_I:%.*]] = trunc i64 [[ADD44_I]] to i32 -// CHECK2-NEXT: store i32 [[CONV45_I]], i32* [[J15_I]], align 4, !noalias !46, !llvm.access.group !47 -// CHECK2-NEXT: [[TMP70:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !46, !llvm.access.group !47 +// CHECK2-NEXT: store i32 [[CONV45_I]], i32* [[J15_I]], align 4, !noalias !47, !llvm.access.group !48 +// CHECK2-NEXT: [[TMP70:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group !48 // CHECK2-NEXT: [[ADD46_I:%.*]] = add nsw i64 [[TMP70]], 1 -// CHECK2-NEXT: store i64 [[ADD46_I]], i64* [[DOTOMP_IV_I]], align 8, !noalias !46, !llvm.access.group !47 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP48:![0-9]+]] +// CHECK2-NEXT: store i64 [[ADD46_I]], i64* [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group !48 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK2: omp.inner.for.end.i: // CHECK2-NEXT: br label [[TASKLOOP_IF_END_I]] // CHECK2: taskloop.if.end.i: -// CHECK2-NEXT: [[TMP71:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !46 +// CHECK2-NEXT: [[TMP71:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !47 // CHECK2-NEXT: [[TMP72:%.*]] = icmp ne i32 [[TMP71]], 0 // CHECK2-NEXT: br i1 [[TMP72]], label [[DOTOMP_LASTPRIVATE_THEN_I:%.*]], label [[DOTOMP_OUTLINED__6_EXIT:%.*]] // CHECK2: .omp.lastprivate.then.i: @@ -1655,61 +1655,61 @@ struct S { // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 8 // CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK2-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META50:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META53:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META55:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META57:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META59:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !61 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !61 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !61 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !61 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !61 -// CHECK2-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !61 -// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !61 -// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !61 -// CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !61 -// CHECK2-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !61 -// CHECK2-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !61 -// CHECK2-NEXT: [[TMP20:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !61 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META51:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META54:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META56:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META58:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META60:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !62 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !62 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !62 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !62 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !62 +// CHECK2-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !62 +// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !62 +// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !62 +// CHECK2-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !62 +// CHECK2-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !62 +// CHECK2-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 +// CHECK2-NEXT: [[TMP20:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 // CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP20]], i32 0, i32 0 // CHECK2-NEXT: [[TMP22:%.*]] = load %struct.S*, %struct.S** [[TMP21]], align 8 -// CHECK2-NEXT: store i32* [[TMP_I]], i32** [[TMP1_I]], align 8, !noalias !61 +// CHECK2-NEXT: store i32* [[TMP_I]], i32** [[TMP1_I]], align 8, !noalias !62 // CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP20]], i32 0, i32 1 // CHECK2-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP23]], align 8 // CHECK2-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK2-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !61 -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !61 +// CHECK2-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !62 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !62 // CHECK2-NEXT: [[SUB3_I:%.*]] = sub nsw i32 [[TMP26]], 1 -// CHECK2-NEXT: store i32 [[SUB3_I]], i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !61 -// CHECK2-NEXT: store i32* [[A_I]], i32** [[TMP4_I]], align 8, !noalias !61 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP4_I]], align 8, !noalias !61 +// CHECK2-NEXT: store i32 [[SUB3_I]], i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !62 +// CHECK2-NEXT: store i32* [[A_I]], i32** [[TMP4_I]], align 8, !noalias !62 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP4_I]], align 8, !noalias !62 // CHECK2-NEXT: store i32 0, i32* [[TMP27]], align 4 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !61 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !62 // CHECK2-NEXT: [[CMP_I:%.*]] = icmp slt i32 0, [[TMP28]] // CHECK2-NEXT: br i1 [[CMP_I]], label [[TASKLOOP_IF_THEN_I:%.*]], label [[DOTOMP_OUTLINED__9_EXIT:%.*]] // CHECK2: taskloop.if.then.i: -// CHECK2-NEXT: store i32* [[A5_I]], i32** [[TMP6_I]], align 8, !noalias !61 -// CHECK2-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !61 +// CHECK2-NEXT: store i32* [[A5_I]], i32** [[TMP6_I]], align 8, !noalias !62 +// CHECK2-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !62 // CHECK2-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP29]] to i32 -// CHECK2-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !61 +// CHECK2-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !62 // CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP20]], i32 0, i32 1 // CHECK2-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP30]], align 8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK2: omp.inner.for.cond.i: -// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !61, !llvm.access.group !62 +// CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !62, !llvm.access.group !63 // CHECK2-NEXT: [[CONV7_I:%.*]] = sext i32 [[TMP32]] to i64 -// CHECK2-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !61, !llvm.access.group !62 +// CHECK2-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !62, !llvm.access.group !63 // CHECK2-NEXT: [[CMP8_I:%.*]] = icmp ule i64 [[CONV7_I]], [[TMP33]] // CHECK2-NEXT: br i1 [[CMP8_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]] // CHECK2: omp.inner.for.body.i: -// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !61, !llvm.access.group !62 -// CHECK2-NEXT: [[TMP35:%.*]] = load i32*, i32** [[TMP6_I]], align 8, !noalias !61, !llvm.access.group !62 -// CHECK2-NEXT: store i32 [[TMP34]], i32* [[TMP35]], align 4, !llvm.access.group !62 -// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !61, !llvm.access.group !62 +// CHECK2-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !62, !llvm.access.group !63 +// CHECK2-NEXT: [[TMP35:%.*]] = load i32*, i32** [[TMP6_I]], align 8, !noalias !62, !llvm.access.group !63 +// CHECK2-NEXT: store i32 [[TMP34]], i32* [[TMP35]], align 4, !llvm.access.group !63 +// CHECK2-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !62, !llvm.access.group !63 // CHECK2-NEXT: [[ADD9_I:%.*]] = add nsw i32 [[TMP36]], 1 -// CHECK2-NEXT: store i32 [[ADD9_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !61, !llvm.access.group !62 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP63:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD9_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !62, !llvm.access.group !63 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP64:![0-9]+]] // CHECK2: omp.inner.for.end.i: // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__9_EXIT]] // CHECK2: .omp_outlined..9.exit: @@ -1894,40 +1894,40 @@ struct S { // CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 8 // CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK3-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 -// CHECK3-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: [[TMP20:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// CHECK3-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: [[TMP20:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK3-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK3-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK3: omp.inner.for.cond.i: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK3-NEXT: [[CONV1_I:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK3-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 +// CHECK3-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV1_I]], [[TMP23]] // CHECK3-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] // CHECK3: omp.inner.for.body.i: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 -// CHECK3-NEXT: store i32 [[TMP24]], i32* [[I_I]], align 4, !noalias !13 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK3-NEXT: store i32 [[TMP24]], i32* [[I_I]], align 4, !noalias !14 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK3-NEXT: [[ADD2_I:%.*]] = add nsw i32 [[TMP25]], 1 -// CHECK3-NEXT: store i32 [[ADD2_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD2_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK3: .omp_outlined..1.exit: // CHECK3-NEXT: ret i32 0 // @@ -2010,40 +2010,40 @@ struct S { // CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 8 // CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK3-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META26:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META28:![0-9]+]]) -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !30 -// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !30 -// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !30 -// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !30 -// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !30 -// CHECK3-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !30 -// CHECK3-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !30 -// CHECK3-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !30 -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !30 -// CHECK3-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !30 -// CHECK3-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !30 -// CHECK3-NEXT: [[TMP20:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !30 -// CHECK3-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !30 +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META27:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]]) +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !31 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !31 +// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !31 +// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !31 +// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !31 +// CHECK3-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !31 +// CHECK3-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !31 +// CHECK3-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !31 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !31 +// CHECK3-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !31 +// CHECK3-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 +// CHECK3-NEXT: [[TMP20:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 +// CHECK3-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !31 // CHECK3-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK3-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !30 +// CHECK3-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !31 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK3: omp.inner.for.cond.i: -// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !30, !llvm.access.group !31 +// CHECK3-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group !32 // CHECK3-NEXT: [[CONV1_I:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK3-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !30, !llvm.access.group !31 +// CHECK3-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !31, !llvm.access.group !32 // CHECK3-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV1_I]], [[TMP23]] // CHECK3-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__3_EXIT:%.*]] // CHECK3: omp.inner.for.body.i: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !30, !llvm.access.group !31 -// CHECK3-NEXT: store i32 [[TMP24]], i32* [[I_I]], align 4, !noalias !30, !llvm.access.group !31 -// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !30, !llvm.access.group !31 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group !32 +// CHECK3-NEXT: store i32 [[TMP24]], i32* [[I_I]], align 4, !noalias !31, !llvm.access.group !32 +// CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group !32 // CHECK3-NEXT: [[ADD2_I:%.*]] = add nsw i32 [[TMP25]], 1 -// CHECK3-NEXT: store i32 [[ADD2_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !30, !llvm.access.group !31 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD2_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group !32 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK3: .omp_outlined..3.exit: // CHECK3-NEXT: ret i32 0 // @@ -2218,38 +2218,38 @@ struct S { // CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK3-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META35:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META40:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META42:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META44:![0-9]+]]) -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !46 -// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !46 -// CHECK3-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !46 -// CHECK3-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !46 -// CHECK3-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !46 -// CHECK3-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !46 -// CHECK3-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !46 -// CHECK3-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !46 -// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !46 -// CHECK3-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !46 -// CHECK3-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !46 -// CHECK3-NEXT: [[TMP22:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !46 -// CHECK3-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !46 -// CHECK3-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !46 +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META36:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META39:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META41:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META45:![0-9]+]]) +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !47 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !47 +// CHECK3-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !47 +// CHECK3-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !47 +// CHECK3-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !47 +// CHECK3-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !47 +// CHECK3-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !47 +// CHECK3-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !47 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !47 +// CHECK3-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !47 +// CHECK3-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !47 +// CHECK3-NEXT: [[TMP22:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !47 +// CHECK3-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !47 +// CHECK3-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !47 // CHECK3-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, i32**)* // CHECK3-NEXT: call void [[TMP25]](i8* [[TMP24]], i32** [[DOTLASTPRIV_PTR_ADDR_I]]) #[[ATTR2]] // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP22]], i32 0, i32 0 // CHECK3-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP26]], align 8 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !46 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !47 // CHECK3-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 1 // CHECK3-NEXT: [[TMP30:%.*]] = load i32*, i32** [[TMP29]], align 8 // CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 -// CHECK3-NEXT: store i32 [[TMP31]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !46 +// CHECK3-NEXT: store i32 [[TMP31]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !47 // CHECK3-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 1 // CHECK3-NEXT: [[TMP33:%.*]] = load i32*, i32** [[TMP32]], align 8 // CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK3-NEXT: store i32 [[TMP34]], i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46 +// CHECK3-NEXT: store i32 [[TMP34]], i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 // CHECK3-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 2 // CHECK3-NEXT: [[TMP36:%.*]] = load i8***, i8**** [[TMP35]], align 8 // CHECK3-NEXT: [[TMP37:%.*]] = load i8**, i8*** [[TMP36]], align 8 @@ -2266,31 +2266,31 @@ struct S { // CHECK3-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i8, i8* [[TMP41]], i64 [[IDXPROM4_I]] // CHECK3-NEXT: [[TMP45:%.*]] = load i8, i8* [[ARRAYIDX5_I]], align 1 // CHECK3-NEXT: [[CONV_I:%.*]] = sext i8 [[TMP45]] to i32 -// CHECK3-NEXT: store i32 [[CONV_I]], i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !46 -// CHECK3-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !46 +// CHECK3-NEXT: store i32 [[CONV_I]], i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47 +// CHECK3-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !47 // CHECK3-NEXT: [[CONV7_I:%.*]] = sext i32 [[TMP46]] to i64 -// CHECK3-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !46 -// CHECK3-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46 +// CHECK3-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47 +// CHECK3-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 // CHECK3-NEXT: [[SUB8_I:%.*]] = sub i32 [[TMP47]], [[TMP48]] // CHECK3-NEXT: [[SUB9_I:%.*]] = sub i32 [[SUB8_I]], 1 // CHECK3-NEXT: [[CONV11_I:%.*]] = zext i32 [[SUB8_I]] to i64 // CHECK3-NEXT: [[MUL_I:%.*]] = mul nsw i64 [[CONV7_I]], [[CONV11_I]] // CHECK3-NEXT: [[SUB12_I:%.*]] = sub nsw i64 [[MUL_I]], 1 -// CHECK3-NEXT: store i64 [[SUB12_I]], i64* [[DOTCAPTURE_EXPR_6_I]], align 8, !noalias !46 -// CHECK3-NEXT: store i32 0, i32* [[I_I]], align 4, !noalias !46 -// CHECK3-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46 -// CHECK3-NEXT: store i32 [[TMP49]], i32* [[J_I]], align 4, !noalias !46 -// CHECK3-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !46 +// CHECK3-NEXT: store i64 [[SUB12_I]], i64* [[DOTCAPTURE_EXPR_6_I]], align 8, !noalias !47 +// CHECK3-NEXT: store i32 0, i32* [[I_I]], align 4, !noalias !47 +// CHECK3-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 +// CHECK3-NEXT: store i32 [[TMP49]], i32* [[J_I]], align 4, !noalias !47 +// CHECK3-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !47 // CHECK3-NEXT: [[CMP_I:%.*]] = icmp slt i32 0, [[TMP50]] // CHECK3-NEXT: br i1 [[CMP_I]], label [[LAND_LHS_TRUE_I:%.*]], label [[TASKLOOP_IF_END_I:%.*]] // CHECK3: land.lhs.true.i: -// CHECK3-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46 -// CHECK3-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !46 +// CHECK3-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 +// CHECK3-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47 // CHECK3-NEXT: [[CMP13_I:%.*]] = icmp slt i32 [[TMP51]], [[TMP52]] // CHECK3-NEXT: br i1 [[CMP13_I]], label [[TASKLOOP_IF_THEN_I:%.*]], label [[TASKLOOP_IF_END_I]] // CHECK3: taskloop.if.then.i: -// CHECK3-NEXT: [[TMP53:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !46 -// CHECK3-NEXT: store i64 [[TMP53]], i64* [[DOTOMP_IV_I]], align 8, !noalias !46 +// CHECK3-NEXT: [[TMP53:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !47 +// CHECK3-NEXT: store i64 [[TMP53]], i64* [[DOTOMP_IV_I]], align 8, !noalias !47 // CHECK3-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 1 // CHECK3-NEXT: [[TMP55:%.*]] = load i32*, i32** [[TMP54]], align 8 // CHECK3-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 2 @@ -2302,32 +2302,32 @@ struct S { // CHECK3: omp_if.then.i: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK3: omp.inner.for.cond.i: -// CHECK3-NEXT: [[TMP60:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !46, !llvm.access.group !47 -// CHECK3-NEXT: [[TMP61:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !46, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP60:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group !48 +// CHECK3-NEXT: [[TMP61:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !47, !llvm.access.group !48 // CHECK3-NEXT: [[CMP16_I:%.*]] = icmp ule i64 [[TMP60]], [[TMP61]] // CHECK3-NEXT: br i1 [[CMP16_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]] // CHECK3: omp.inner.for.body.i: -// CHECK3-NEXT: [[TMP62:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !46, !llvm.access.group !47 -// CHECK3-NEXT: [[TMP63:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !46, !llvm.access.group !47 -// CHECK3-NEXT: [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP62:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group !48 +// CHECK3-NEXT: [[TMP63:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47, !llvm.access.group !48 +// CHECK3-NEXT: [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47, !llvm.access.group !48 // CHECK3-NEXT: [[SUB17_I:%.*]] = sub i32 [[TMP63]], [[TMP64]] // CHECK3-NEXT: [[SUB18_I:%.*]] = sub i32 [[SUB17_I]], 1 // CHECK3-NEXT: [[CONV22_I:%.*]] = zext i32 [[SUB17_I]] to i64 // CHECK3-NEXT: [[DIV23_I:%.*]] = sdiv i64 [[TMP62]], [[CONV22_I]] // CHECK3-NEXT: [[CONV26_I:%.*]] = trunc i64 [[DIV23_I]] to i32 -// CHECK3-NEXT: store i32 [[CONV26_I]], i32* [[I14_I]], align 4, !noalias !46, !llvm.access.group !47 -// CHECK3-NEXT: [[TMP65:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46, !llvm.access.group !47 +// CHECK3-NEXT: store i32 [[CONV26_I]], i32* [[I14_I]], align 4, !noalias !47, !llvm.access.group !48 +// CHECK3-NEXT: [[TMP65:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47, !llvm.access.group !48 // CHECK3-NEXT: [[CONV27_I:%.*]] = sext i32 [[TMP65]] to i64 -// CHECK3-NEXT: [[TMP66:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !46, !llvm.access.group !47 -// CHECK3-NEXT: [[TMP67:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !46, !llvm.access.group !47 -// CHECK3-NEXT: [[TMP68:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !46, !llvm.access.group !47 -// CHECK3-NEXT: [[TMP69:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP66:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group !48 +// CHECK3-NEXT: [[TMP67:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group !48 +// CHECK3-NEXT: [[TMP68:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47, !llvm.access.group !48 +// CHECK3-NEXT: [[TMP69:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47, !llvm.access.group !48 // CHECK3-NEXT: [[SUB28_I:%.*]] = sub i32 [[TMP68]], [[TMP69]] // CHECK3-NEXT: [[SUB29_I:%.*]] = sub i32 [[SUB28_I]], 1 // CHECK3-NEXT: [[CONV33_I:%.*]] = zext i32 [[SUB28_I]] to i64 // CHECK3-NEXT: [[DIV34_I:%.*]] = sdiv i64 [[TMP67]], [[CONV33_I]] -// CHECK3-NEXT: [[TMP70:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !46, !llvm.access.group !47 -// CHECK3-NEXT: [[TMP71:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46, !llvm.access.group !47 +// CHECK3-NEXT: [[TMP70:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47, !llvm.access.group !48 +// CHECK3-NEXT: [[TMP71:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47, !llvm.access.group !48 // CHECK3-NEXT: [[SUB35_I:%.*]] = sub i32 [[TMP70]], [[TMP71]] // CHECK3-NEXT: [[SUB36_I:%.*]] = sub i32 [[SUB35_I]], 1 // CHECK3-NEXT: [[CONV40_I:%.*]] = zext i32 [[SUB35_I]] to i64 @@ -2335,42 +2335,42 @@ struct S { // CHECK3-NEXT: [[SUB42_I:%.*]] = sub nsw i64 [[TMP66]], [[MUL41_I]] // CHECK3-NEXT: [[ADD44_I:%.*]] = add nsw i64 [[CONV27_I]], [[SUB42_I]] // CHECK3-NEXT: [[CONV45_I:%.*]] = trunc i64 [[ADD44_I]] to i32 -// CHECK3-NEXT: store i32 [[CONV45_I]], i32* [[J15_I]], align 4, !noalias !46, !llvm.access.group !47 -// CHECK3-NEXT: [[TMP72:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !46, !llvm.access.group !47 +// CHECK3-NEXT: store i32 [[CONV45_I]], i32* [[J15_I]], align 4, !noalias !47, !llvm.access.group !48 +// CHECK3-NEXT: [[TMP72:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group !48 // CHECK3-NEXT: [[ADD46_I:%.*]] = add nsw i64 [[TMP72]], 1 -// CHECK3-NEXT: store i64 [[ADD46_I]], i64* [[DOTOMP_IV_I]], align 8, !noalias !46, !llvm.access.group !47 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP48:![0-9]+]] +// CHECK3-NEXT: store i64 [[ADD46_I]], i64* [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group !48 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK3: omp.inner.for.end.i: // CHECK3-NEXT: br label [[OMP_IF_END_I:%.*]] // CHECK3: omp_if.else.i: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND47_I:%.*]] // CHECK3: omp.inner.for.cond47.i: -// CHECK3-NEXT: [[TMP73:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !46 -// CHECK3-NEXT: [[TMP74:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !46 +// CHECK3-NEXT: [[TMP73:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !47 +// CHECK3-NEXT: [[TMP74:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !47 // CHECK3-NEXT: [[CMP48_I:%.*]] = icmp ule i64 [[TMP73]], [[TMP74]] // CHECK3-NEXT: br i1 [[CMP48_I]], label [[OMP_INNER_FOR_BODY49_I:%.*]], label [[OMP_INNER_FOR_END82_I:%.*]] // CHECK3: omp.inner.for.body49.i: -// CHECK3-NEXT: [[TMP75:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !46 -// CHECK3-NEXT: [[TMP76:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !46 -// CHECK3-NEXT: [[TMP77:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46 +// CHECK3-NEXT: [[TMP75:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !47 +// CHECK3-NEXT: [[TMP76:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47 +// CHECK3-NEXT: [[TMP77:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 // CHECK3-NEXT: [[SUB50_I:%.*]] = sub i32 [[TMP76]], [[TMP77]] // CHECK3-NEXT: [[SUB51_I:%.*]] = sub i32 [[SUB50_I]], 1 // CHECK3-NEXT: [[CONV55_I:%.*]] = zext i32 [[SUB50_I]] to i64 // CHECK3-NEXT: [[DIV56_I:%.*]] = sdiv i64 [[TMP75]], [[CONV55_I]] // CHECK3-NEXT: [[CONV59_I:%.*]] = trunc i64 [[DIV56_I]] to i32 -// CHECK3-NEXT: store i32 [[CONV59_I]], i32* [[I14_I]], align 4, !noalias !46 -// CHECK3-NEXT: [[TMP78:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46 +// CHECK3-NEXT: store i32 [[CONV59_I]], i32* [[I14_I]], align 4, !noalias !47 +// CHECK3-NEXT: [[TMP78:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 // CHECK3-NEXT: [[CONV60_I:%.*]] = sext i32 [[TMP78]] to i64 -// CHECK3-NEXT: [[TMP79:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !46 -// CHECK3-NEXT: [[TMP80:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !46 -// CHECK3-NEXT: [[TMP81:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !46 -// CHECK3-NEXT: [[TMP82:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46 +// CHECK3-NEXT: [[TMP79:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !47 +// CHECK3-NEXT: [[TMP80:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !47 +// CHECK3-NEXT: [[TMP81:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47 +// CHECK3-NEXT: [[TMP82:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 // CHECK3-NEXT: [[SUB61_I:%.*]] = sub i32 [[TMP81]], [[TMP82]] // CHECK3-NEXT: [[SUB62_I:%.*]] = sub i32 [[SUB61_I]], 1 // CHECK3-NEXT: [[CONV66_I:%.*]] = zext i32 [[SUB61_I]] to i64 // CHECK3-NEXT: [[DIV67_I:%.*]] = sdiv i64 [[TMP80]], [[CONV66_I]] -// CHECK3-NEXT: [[TMP83:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !46 -// CHECK3-NEXT: [[TMP84:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46 +// CHECK3-NEXT: [[TMP83:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47 +// CHECK3-NEXT: [[TMP84:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 // CHECK3-NEXT: [[SUB68_I:%.*]] = sub i32 [[TMP83]], [[TMP84]] // CHECK3-NEXT: [[SUB69_I:%.*]] = sub i32 [[SUB68_I]], 1 // CHECK3-NEXT: [[CONV73_I:%.*]] = zext i32 [[SUB68_I]] to i64 @@ -2378,17 +2378,17 @@ struct S { // CHECK3-NEXT: [[SUB75_I:%.*]] = sub nsw i64 [[TMP79]], [[MUL74_I]] // CHECK3-NEXT: [[ADD77_I:%.*]] = add nsw i64 [[CONV60_I]], [[SUB75_I]] // CHECK3-NEXT: [[CONV78_I:%.*]] = trunc i64 [[ADD77_I]] to i32 -// CHECK3-NEXT: store i32 [[CONV78_I]], i32* [[J15_I]], align 4, !noalias !46 -// CHECK3-NEXT: [[TMP85:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !46 +// CHECK3-NEXT: store i32 [[CONV78_I]], i32* [[J15_I]], align 4, !noalias !47 +// CHECK3-NEXT: [[TMP85:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !47 // CHECK3-NEXT: [[ADD81_I:%.*]] = add nsw i64 [[TMP85]], 1 -// CHECK3-NEXT: store i64 [[ADD81_I]], i64* [[DOTOMP_IV_I]], align 8, !noalias !46 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND47_I]], !llvm.loop [[LOOP50:![0-9]+]] +// CHECK3-NEXT: store i64 [[ADD81_I]], i64* [[DOTOMP_IV_I]], align 8, !noalias !47 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND47_I]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK3: omp.inner.for.end82.i: // CHECK3-NEXT: br label [[OMP_IF_END_I]] // CHECK3: omp_if.end.i: // CHECK3-NEXT: br label [[TASKLOOP_IF_END_I]] // CHECK3: taskloop.if.end.i: -// CHECK3-NEXT: [[TMP86:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !46 +// CHECK3-NEXT: [[TMP86:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !47 // CHECK3-NEXT: [[TMP87:%.*]] = icmp ne i32 [[TMP86]], 0 // CHECK3-NEXT: br i1 [[TMP87]], label [[DOTOMP_LASTPRIVATE_THEN_I:%.*]], label [[DOTOMP_OUTLINED__6_EXIT:%.*]] // CHECK3: .omp.lastprivate.then.i: @@ -2575,61 +2575,61 @@ struct S { // CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 8 // CHECK3-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK3-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META52:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META55:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META57:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META59:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META61:![0-9]+]]) -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !63 -// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !63 -// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !63 -// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !63 -// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !63 -// CHECK3-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !63 -// CHECK3-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !63 -// CHECK3-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !63 -// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !63 -// CHECK3-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !63 -// CHECK3-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !63 -// CHECK3-NEXT: [[TMP20:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !63 +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META53:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META56:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META58:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META60:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META62:![0-9]+]]) +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !64 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !64 +// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !64 +// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !64 +// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !64 +// CHECK3-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !64 +// CHECK3-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !64 +// CHECK3-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !64 +// CHECK3-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !64 +// CHECK3-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !64 +// CHECK3-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !64 +// CHECK3-NEXT: [[TMP20:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !64 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP20]], i32 0, i32 0 // CHECK3-NEXT: [[TMP22:%.*]] = load %struct.S*, %struct.S** [[TMP21]], align 8 -// CHECK3-NEXT: store i32* [[TMP_I]], i32** [[TMP1_I]], align 8, !noalias !63 +// CHECK3-NEXT: store i32* [[TMP_I]], i32** [[TMP1_I]], align 8, !noalias !64 // CHECK3-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP20]], i32 0, i32 1 // CHECK3-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP23]], align 8 // CHECK3-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK3-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !63 -// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !63 +// CHECK3-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !64 +// CHECK3-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !64 // CHECK3-NEXT: [[SUB3_I:%.*]] = sub nsw i32 [[TMP26]], 1 -// CHECK3-NEXT: store i32 [[SUB3_I]], i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !63 -// CHECK3-NEXT: store i32* [[A_I]], i32** [[TMP4_I]], align 8, !noalias !63 -// CHECK3-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP4_I]], align 8, !noalias !63 +// CHECK3-NEXT: store i32 [[SUB3_I]], i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !64 +// CHECK3-NEXT: store i32* [[A_I]], i32** [[TMP4_I]], align 8, !noalias !64 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP4_I]], align 8, !noalias !64 // CHECK3-NEXT: store i32 0, i32* [[TMP27]], align 4 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !63 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !64 // CHECK3-NEXT: [[CMP_I:%.*]] = icmp slt i32 0, [[TMP28]] // CHECK3-NEXT: br i1 [[CMP_I]], label [[TASKLOOP_IF_THEN_I:%.*]], label [[DOTOMP_OUTLINED__9_EXIT:%.*]] // CHECK3: taskloop.if.then.i: -// CHECK3-NEXT: store i32* [[A5_I]], i32** [[TMP6_I]], align 8, !noalias !63 -// CHECK3-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !63 +// CHECK3-NEXT: store i32* [[A5_I]], i32** [[TMP6_I]], align 8, !noalias !64 +// CHECK3-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !64 // CHECK3-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP29]] to i32 -// CHECK3-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !63 +// CHECK3-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !64 // CHECK3-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP20]], i32 0, i32 1 // CHECK3-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP30]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK3: omp.inner.for.cond.i: -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !63, !llvm.access.group !64 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !64, !llvm.access.group !65 // CHECK3-NEXT: [[CONV7_I:%.*]] = sext i32 [[TMP32]] to i64 -// CHECK3-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !63, !llvm.access.group !64 +// CHECK3-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !64, !llvm.access.group !65 // CHECK3-NEXT: [[CMP8_I:%.*]] = icmp ule i64 [[CONV7_I]], [[TMP33]] // CHECK3-NEXT: br i1 [[CMP8_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]] // CHECK3: omp.inner.for.body.i: -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !63, !llvm.access.group !64 -// CHECK3-NEXT: [[TMP35:%.*]] = load i32*, i32** [[TMP6_I]], align 8, !noalias !63, !llvm.access.group !64 -// CHECK3-NEXT: store i32 [[TMP34]], i32* [[TMP35]], align 4, !llvm.access.group !64 -// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !63, !llvm.access.group !64 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !64, !llvm.access.group !65 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32*, i32** [[TMP6_I]], align 8, !noalias !64, !llvm.access.group !65 +// CHECK3-NEXT: store i32 [[TMP34]], i32* [[TMP35]], align 4, !llvm.access.group !65 +// CHECK3-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !64, !llvm.access.group !65 // CHECK3-NEXT: [[ADD9_I:%.*]] = add nsw i32 [[TMP36]], 1 -// CHECK3-NEXT: store i32 [[ADD9_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !63, !llvm.access.group !64 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP65:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD9_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !64, !llvm.access.group !65 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP66:![0-9]+]] // CHECK3: omp.inner.for.end.i: // CHECK3-NEXT: br label [[DOTOMP_OUTLINED__9_EXIT]] // CHECK3: .omp_outlined..9.exit: @@ -2794,40 +2794,40 @@ struct S { // CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 8 // CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK4-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 -// CHECK4-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: [[TMP20:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// CHECK4-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: [[TMP20:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK4-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK4-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK4-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK4: omp.inner.for.cond.i: -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK4-NEXT: [[CONV1_I:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK4-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 +// CHECK4-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 // CHECK4-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV1_I]], [[TMP23]] // CHECK4-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] // CHECK4: omp.inner.for.body.i: -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 -// CHECK4-NEXT: store i32 [[TMP24]], i32* [[I_I]], align 4, !noalias !13 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK4-NEXT: store i32 [[TMP24]], i32* [[I_I]], align 4, !noalias !14 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK4-NEXT: [[ADD2_I:%.*]] = add nsw i32 [[TMP25]], 1 -// CHECK4-NEXT: store i32 [[ADD2_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD2_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK4: .omp_outlined..1.exit: // CHECK4-NEXT: ret i32 0 // @@ -2910,40 +2910,40 @@ struct S { // CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 8 // CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK4-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META26:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META28:![0-9]+]]) -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !30 -// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !30 -// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !30 -// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !30 -// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !30 -// CHECK4-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !30 -// CHECK4-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !30 -// CHECK4-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !30 -// CHECK4-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !30 -// CHECK4-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !30 -// CHECK4-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !30 -// CHECK4-NEXT: [[TMP20:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !30 -// CHECK4-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !30 +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META27:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]]) +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !31 +// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !31 +// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !31 +// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !31 +// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !31 +// CHECK4-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !31 +// CHECK4-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !31 +// CHECK4-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !31 +// CHECK4-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !31 +// CHECK4-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !31 +// CHECK4-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 +// CHECK4-NEXT: [[TMP20:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 +// CHECK4-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !31 // CHECK4-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP21]] to i32 -// CHECK4-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !30 +// CHECK4-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !31 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK4: omp.inner.for.cond.i: -// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !30, !llvm.access.group !31 +// CHECK4-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group !32 // CHECK4-NEXT: [[CONV1_I:%.*]] = sext i32 [[TMP22]] to i64 -// CHECK4-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !30, !llvm.access.group !31 +// CHECK4-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !31, !llvm.access.group !32 // CHECK4-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV1_I]], [[TMP23]] // CHECK4-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__3_EXIT:%.*]] // CHECK4: omp.inner.for.body.i: -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !30, !llvm.access.group !31 -// CHECK4-NEXT: store i32 [[TMP24]], i32* [[I_I]], align 4, !noalias !30, !llvm.access.group !31 -// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !30, !llvm.access.group !31 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group !32 +// CHECK4-NEXT: store i32 [[TMP24]], i32* [[I_I]], align 4, !noalias !31, !llvm.access.group !32 +// CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group !32 // CHECK4-NEXT: [[ADD2_I:%.*]] = add nsw i32 [[TMP25]], 1 -// CHECK4-NEXT: store i32 [[ADD2_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !30, !llvm.access.group !31 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD2_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group !32 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK4: .omp_outlined..3.exit: // CHECK4-NEXT: ret i32 0 // @@ -3118,38 +3118,38 @@ struct S { // CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 // CHECK4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK4-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META35:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META40:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META42:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META44:![0-9]+]]) -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !46 -// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !46 -// CHECK4-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !46 -// CHECK4-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !46 -// CHECK4-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !46 -// CHECK4-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !46 -// CHECK4-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !46 -// CHECK4-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !46 -// CHECK4-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !46 -// CHECK4-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !46 -// CHECK4-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !46 -// CHECK4-NEXT: [[TMP22:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !46 -// CHECK4-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !46 -// CHECK4-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !46 +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META36:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META39:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META41:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META45:![0-9]+]]) +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !47 +// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !47 +// CHECK4-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !47 +// CHECK4-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !47 +// CHECK4-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !47 +// CHECK4-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !47 +// CHECK4-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !47 +// CHECK4-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !47 +// CHECK4-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !47 +// CHECK4-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !47 +// CHECK4-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !47 +// CHECK4-NEXT: [[TMP22:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !47 +// CHECK4-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !47 +// CHECK4-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !47 // CHECK4-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, i32**)* // CHECK4-NEXT: call void [[TMP25]](i8* [[TMP24]], i32** [[DOTLASTPRIV_PTR_ADDR_I]]) #[[ATTR2]] // CHECK4-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_2:%.*]], %struct.anon.2* [[TMP22]], i32 0, i32 0 // CHECK4-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP26]], align 8 -// CHECK4-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !46 +// CHECK4-NEXT: [[TMP28:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !47 // CHECK4-NEXT: [[TMP29:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 1 // CHECK4-NEXT: [[TMP30:%.*]] = load i32*, i32** [[TMP29]], align 8 // CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP30]], align 4 -// CHECK4-NEXT: store i32 [[TMP31]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !46 +// CHECK4-NEXT: store i32 [[TMP31]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !47 // CHECK4-NEXT: [[TMP32:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 1 // CHECK4-NEXT: [[TMP33:%.*]] = load i32*, i32** [[TMP32]], align 8 // CHECK4-NEXT: [[TMP34:%.*]] = load i32, i32* [[TMP33]], align 4 -// CHECK4-NEXT: store i32 [[TMP34]], i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46 +// CHECK4-NEXT: store i32 [[TMP34]], i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 // CHECK4-NEXT: [[TMP35:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 2 // CHECK4-NEXT: [[TMP36:%.*]] = load i8***, i8**** [[TMP35]], align 8 // CHECK4-NEXT: [[TMP37:%.*]] = load i8**, i8*** [[TMP36]], align 8 @@ -3166,31 +3166,31 @@ struct S { // CHECK4-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds i8, i8* [[TMP41]], i64 [[IDXPROM4_I]] // CHECK4-NEXT: [[TMP45:%.*]] = load i8, i8* [[ARRAYIDX5_I]], align 1 // CHECK4-NEXT: [[CONV_I:%.*]] = sext i8 [[TMP45]] to i32 -// CHECK4-NEXT: store i32 [[CONV_I]], i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !46 -// CHECK4-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !46 +// CHECK4-NEXT: store i32 [[CONV_I]], i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47 +// CHECK4-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !47 // CHECK4-NEXT: [[CONV7_I:%.*]] = sext i32 [[TMP46]] to i64 -// CHECK4-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !46 -// CHECK4-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46 +// CHECK4-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47 +// CHECK4-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 // CHECK4-NEXT: [[SUB8_I:%.*]] = sub i32 [[TMP47]], [[TMP48]] // CHECK4-NEXT: [[SUB9_I:%.*]] = sub i32 [[SUB8_I]], 1 // CHECK4-NEXT: [[CONV11_I:%.*]] = zext i32 [[SUB8_I]] to i64 // CHECK4-NEXT: [[MUL_I:%.*]] = mul nsw i64 [[CONV7_I]], [[CONV11_I]] // CHECK4-NEXT: [[SUB12_I:%.*]] = sub nsw i64 [[MUL_I]], 1 -// CHECK4-NEXT: store i64 [[SUB12_I]], i64* [[DOTCAPTURE_EXPR_6_I]], align 8, !noalias !46 -// CHECK4-NEXT: store i32 0, i32* [[I_I]], align 4, !noalias !46 -// CHECK4-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46 -// CHECK4-NEXT: store i32 [[TMP49]], i32* [[J_I]], align 4, !noalias !46 -// CHECK4-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !46 +// CHECK4-NEXT: store i64 [[SUB12_I]], i64* [[DOTCAPTURE_EXPR_6_I]], align 8, !noalias !47 +// CHECK4-NEXT: store i32 0, i32* [[I_I]], align 4, !noalias !47 +// CHECK4-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 +// CHECK4-NEXT: store i32 [[TMP49]], i32* [[J_I]], align 4, !noalias !47 +// CHECK4-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !47 // CHECK4-NEXT: [[CMP_I:%.*]] = icmp slt i32 0, [[TMP50]] // CHECK4-NEXT: br i1 [[CMP_I]], label [[LAND_LHS_TRUE_I:%.*]], label [[TASKLOOP_IF_END_I:%.*]] // CHECK4: land.lhs.true.i: -// CHECK4-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46 -// CHECK4-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !46 +// CHECK4-NEXT: [[TMP51:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 +// CHECK4-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47 // CHECK4-NEXT: [[CMP13_I:%.*]] = icmp slt i32 [[TMP51]], [[TMP52]] // CHECK4-NEXT: br i1 [[CMP13_I]], label [[TASKLOOP_IF_THEN_I:%.*]], label [[TASKLOOP_IF_END_I]] // CHECK4: taskloop.if.then.i: -// CHECK4-NEXT: [[TMP53:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !46 -// CHECK4-NEXT: store i64 [[TMP53]], i64* [[DOTOMP_IV_I]], align 8, !noalias !46 +// CHECK4-NEXT: [[TMP53:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !47 +// CHECK4-NEXT: store i64 [[TMP53]], i64* [[DOTOMP_IV_I]], align 8, !noalias !47 // CHECK4-NEXT: [[TMP54:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 1 // CHECK4-NEXT: [[TMP55:%.*]] = load i32*, i32** [[TMP54]], align 8 // CHECK4-NEXT: [[TMP56:%.*]] = getelementptr inbounds [[STRUCT_ANON_2]], %struct.anon.2* [[TMP22]], i32 0, i32 2 @@ -3202,32 +3202,32 @@ struct S { // CHECK4: omp_if.then.i: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK4: omp.inner.for.cond.i: -// CHECK4-NEXT: [[TMP60:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !46, !llvm.access.group !47 -// CHECK4-NEXT: [[TMP61:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !46, !llvm.access.group !47 +// CHECK4-NEXT: [[TMP60:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group !48 +// CHECK4-NEXT: [[TMP61:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !47, !llvm.access.group !48 // CHECK4-NEXT: [[CMP16_I:%.*]] = icmp ule i64 [[TMP60]], [[TMP61]] // CHECK4-NEXT: br i1 [[CMP16_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]] // CHECK4: omp.inner.for.body.i: -// CHECK4-NEXT: [[TMP62:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !46, !llvm.access.group !47 -// CHECK4-NEXT: [[TMP63:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !46, !llvm.access.group !47 -// CHECK4-NEXT: [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46, !llvm.access.group !47 +// CHECK4-NEXT: [[TMP62:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group !48 +// CHECK4-NEXT: [[TMP63:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47, !llvm.access.group !48 +// CHECK4-NEXT: [[TMP64:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47, !llvm.access.group !48 // CHECK4-NEXT: [[SUB17_I:%.*]] = sub i32 [[TMP63]], [[TMP64]] // CHECK4-NEXT: [[SUB18_I:%.*]] = sub i32 [[SUB17_I]], 1 // CHECK4-NEXT: [[CONV22_I:%.*]] = zext i32 [[SUB17_I]] to i64 // CHECK4-NEXT: [[DIV23_I:%.*]] = sdiv i64 [[TMP62]], [[CONV22_I]] // CHECK4-NEXT: [[CONV26_I:%.*]] = trunc i64 [[DIV23_I]] to i32 -// CHECK4-NEXT: store i32 [[CONV26_I]], i32* [[I14_I]], align 4, !noalias !46, !llvm.access.group !47 -// CHECK4-NEXT: [[TMP65:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46, !llvm.access.group !47 +// CHECK4-NEXT: store i32 [[CONV26_I]], i32* [[I14_I]], align 4, !noalias !47, !llvm.access.group !48 +// CHECK4-NEXT: [[TMP65:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47, !llvm.access.group !48 // CHECK4-NEXT: [[CONV27_I:%.*]] = sext i32 [[TMP65]] to i64 -// CHECK4-NEXT: [[TMP66:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !46, !llvm.access.group !47 -// CHECK4-NEXT: [[TMP67:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !46, !llvm.access.group !47 -// CHECK4-NEXT: [[TMP68:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !46, !llvm.access.group !47 -// CHECK4-NEXT: [[TMP69:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46, !llvm.access.group !47 +// CHECK4-NEXT: [[TMP66:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group !48 +// CHECK4-NEXT: [[TMP67:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group !48 +// CHECK4-NEXT: [[TMP68:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47, !llvm.access.group !48 +// CHECK4-NEXT: [[TMP69:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47, !llvm.access.group !48 // CHECK4-NEXT: [[SUB28_I:%.*]] = sub i32 [[TMP68]], [[TMP69]] // CHECK4-NEXT: [[SUB29_I:%.*]] = sub i32 [[SUB28_I]], 1 // CHECK4-NEXT: [[CONV33_I:%.*]] = zext i32 [[SUB28_I]] to i64 // CHECK4-NEXT: [[DIV34_I:%.*]] = sdiv i64 [[TMP67]], [[CONV33_I]] -// CHECK4-NEXT: [[TMP70:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !46, !llvm.access.group !47 -// CHECK4-NEXT: [[TMP71:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46, !llvm.access.group !47 +// CHECK4-NEXT: [[TMP70:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47, !llvm.access.group !48 +// CHECK4-NEXT: [[TMP71:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47, !llvm.access.group !48 // CHECK4-NEXT: [[SUB35_I:%.*]] = sub i32 [[TMP70]], [[TMP71]] // CHECK4-NEXT: [[SUB36_I:%.*]] = sub i32 [[SUB35_I]], 1 // CHECK4-NEXT: [[CONV40_I:%.*]] = zext i32 [[SUB35_I]] to i64 @@ -3235,42 +3235,42 @@ struct S { // CHECK4-NEXT: [[SUB42_I:%.*]] = sub nsw i64 [[TMP66]], [[MUL41_I]] // CHECK4-NEXT: [[ADD44_I:%.*]] = add nsw i64 [[CONV27_I]], [[SUB42_I]] // CHECK4-NEXT: [[CONV45_I:%.*]] = trunc i64 [[ADD44_I]] to i32 -// CHECK4-NEXT: store i32 [[CONV45_I]], i32* [[J15_I]], align 4, !noalias !46, !llvm.access.group !47 -// CHECK4-NEXT: [[TMP72:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !46, !llvm.access.group !47 +// CHECK4-NEXT: store i32 [[CONV45_I]], i32* [[J15_I]], align 4, !noalias !47, !llvm.access.group !48 +// CHECK4-NEXT: [[TMP72:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group !48 // CHECK4-NEXT: [[ADD46_I:%.*]] = add nsw i64 [[TMP72]], 1 -// CHECK4-NEXT: store i64 [[ADD46_I]], i64* [[DOTOMP_IV_I]], align 8, !noalias !46, !llvm.access.group !47 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP48:![0-9]+]] +// CHECK4-NEXT: store i64 [[ADD46_I]], i64* [[DOTOMP_IV_I]], align 8, !noalias !47, !llvm.access.group !48 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK4: omp.inner.for.end.i: // CHECK4-NEXT: br label [[OMP_IF_END_I:%.*]] // CHECK4: omp_if.else.i: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND47_I:%.*]] // CHECK4: omp.inner.for.cond47.i: -// CHECK4-NEXT: [[TMP73:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !46 -// CHECK4-NEXT: [[TMP74:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !46 +// CHECK4-NEXT: [[TMP73:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !47 +// CHECK4-NEXT: [[TMP74:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !47 // CHECK4-NEXT: [[CMP48_I:%.*]] = icmp ule i64 [[TMP73]], [[TMP74]] // CHECK4-NEXT: br i1 [[CMP48_I]], label [[OMP_INNER_FOR_BODY49_I:%.*]], label [[OMP_INNER_FOR_END82_I:%.*]] // CHECK4: omp.inner.for.body49.i: -// CHECK4-NEXT: [[TMP75:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !46 -// CHECK4-NEXT: [[TMP76:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !46 -// CHECK4-NEXT: [[TMP77:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46 +// CHECK4-NEXT: [[TMP75:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !47 +// CHECK4-NEXT: [[TMP76:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47 +// CHECK4-NEXT: [[TMP77:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 // CHECK4-NEXT: [[SUB50_I:%.*]] = sub i32 [[TMP76]], [[TMP77]] // CHECK4-NEXT: [[SUB51_I:%.*]] = sub i32 [[SUB50_I]], 1 // CHECK4-NEXT: [[CONV55_I:%.*]] = zext i32 [[SUB50_I]] to i64 // CHECK4-NEXT: [[DIV56_I:%.*]] = sdiv i64 [[TMP75]], [[CONV55_I]] // CHECK4-NEXT: [[CONV59_I:%.*]] = trunc i64 [[DIV56_I]] to i32 -// CHECK4-NEXT: store i32 [[CONV59_I]], i32* [[I14_I]], align 4, !noalias !46 -// CHECK4-NEXT: [[TMP78:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46 +// CHECK4-NEXT: store i32 [[CONV59_I]], i32* [[I14_I]], align 4, !noalias !47 +// CHECK4-NEXT: [[TMP78:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 // CHECK4-NEXT: [[CONV60_I:%.*]] = sext i32 [[TMP78]] to i64 -// CHECK4-NEXT: [[TMP79:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !46 -// CHECK4-NEXT: [[TMP80:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !46 -// CHECK4-NEXT: [[TMP81:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !46 -// CHECK4-NEXT: [[TMP82:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46 +// CHECK4-NEXT: [[TMP79:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !47 +// CHECK4-NEXT: [[TMP80:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !47 +// CHECK4-NEXT: [[TMP81:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47 +// CHECK4-NEXT: [[TMP82:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 // CHECK4-NEXT: [[SUB61_I:%.*]] = sub i32 [[TMP81]], [[TMP82]] // CHECK4-NEXT: [[SUB62_I:%.*]] = sub i32 [[SUB61_I]], 1 // CHECK4-NEXT: [[CONV66_I:%.*]] = zext i32 [[SUB61_I]] to i64 // CHECK4-NEXT: [[DIV67_I:%.*]] = sdiv i64 [[TMP80]], [[CONV66_I]] -// CHECK4-NEXT: [[TMP83:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !46 -// CHECK4-NEXT: [[TMP84:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !46 +// CHECK4-NEXT: [[TMP83:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3_I]], align 4, !noalias !47 +// CHECK4-NEXT: [[TMP84:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !47 // CHECK4-NEXT: [[SUB68_I:%.*]] = sub i32 [[TMP83]], [[TMP84]] // CHECK4-NEXT: [[SUB69_I:%.*]] = sub i32 [[SUB68_I]], 1 // CHECK4-NEXT: [[CONV73_I:%.*]] = zext i32 [[SUB68_I]] to i64 @@ -3278,17 +3278,17 @@ struct S { // CHECK4-NEXT: [[SUB75_I:%.*]] = sub nsw i64 [[TMP79]], [[MUL74_I]] // CHECK4-NEXT: [[ADD77_I:%.*]] = add nsw i64 [[CONV60_I]], [[SUB75_I]] // CHECK4-NEXT: [[CONV78_I:%.*]] = trunc i64 [[ADD77_I]] to i32 -// CHECK4-NEXT: store i32 [[CONV78_I]], i32* [[J15_I]], align 4, !noalias !46 -// CHECK4-NEXT: [[TMP85:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !46 +// CHECK4-NEXT: store i32 [[CONV78_I]], i32* [[J15_I]], align 4, !noalias !47 +// CHECK4-NEXT: [[TMP85:%.*]] = load i64, i64* [[DOTOMP_IV_I]], align 8, !noalias !47 // CHECK4-NEXT: [[ADD81_I:%.*]] = add nsw i64 [[TMP85]], 1 -// CHECK4-NEXT: store i64 [[ADD81_I]], i64* [[DOTOMP_IV_I]], align 8, !noalias !46 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND47_I]], !llvm.loop [[LOOP50:![0-9]+]] +// CHECK4-NEXT: store i64 [[ADD81_I]], i64* [[DOTOMP_IV_I]], align 8, !noalias !47 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND47_I]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK4: omp.inner.for.end82.i: // CHECK4-NEXT: br label [[OMP_IF_END_I]] // CHECK4: omp_if.end.i: // CHECK4-NEXT: br label [[TASKLOOP_IF_END_I]] // CHECK4: taskloop.if.end.i: -// CHECK4-NEXT: [[TMP86:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !46 +// CHECK4-NEXT: [[TMP86:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !47 // CHECK4-NEXT: [[TMP87:%.*]] = icmp ne i32 [[TMP86]], 0 // CHECK4-NEXT: br i1 [[TMP87]], label [[DOTOMP_LASTPRIVATE_THEN_I:%.*]], label [[DOTOMP_OUTLINED__6_EXIT:%.*]] // CHECK4: .omp.lastprivate.then.i: @@ -3475,61 +3475,61 @@ struct S { // CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 8 // CHECK4-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK4-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META52:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META55:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META57:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META59:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META61:![0-9]+]]) -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !63 -// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !63 -// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !63 -// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !63 -// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !63 -// CHECK4-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !63 -// CHECK4-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !63 -// CHECK4-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !63 -// CHECK4-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !63 -// CHECK4-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !63 -// CHECK4-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !63 -// CHECK4-NEXT: [[TMP20:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !63 +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META53:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META56:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META58:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META60:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META62:![0-9]+]]) +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !64 +// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !64 +// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !64 +// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !64 +// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !64 +// CHECK4-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !64 +// CHECK4-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !64 +// CHECK4-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !64 +// CHECK4-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !64 +// CHECK4-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !64 +// CHECK4-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !64 +// CHECK4-NEXT: [[TMP20:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !64 // CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON_4:%.*]], %struct.anon.4* [[TMP20]], i32 0, i32 0 // CHECK4-NEXT: [[TMP22:%.*]] = load %struct.S*, %struct.S** [[TMP21]], align 8 -// CHECK4-NEXT: store i32* [[TMP_I]], i32** [[TMP1_I]], align 8, !noalias !63 +// CHECK4-NEXT: store i32* [[TMP_I]], i32** [[TMP1_I]], align 8, !noalias !64 // CHECK4-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP20]], i32 0, i32 1 // CHECK4-NEXT: [[TMP24:%.*]] = load i32*, i32** [[TMP23]], align 8 // CHECK4-NEXT: [[TMP25:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK4-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !63 -// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !63 +// CHECK4-NEXT: store i32 [[TMP25]], i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !64 +// CHECK4-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !64 // CHECK4-NEXT: [[SUB3_I:%.*]] = sub nsw i32 [[TMP26]], 1 -// CHECK4-NEXT: store i32 [[SUB3_I]], i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !63 -// CHECK4-NEXT: store i32* [[A_I]], i32** [[TMP4_I]], align 8, !noalias !63 -// CHECK4-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP4_I]], align 8, !noalias !63 +// CHECK4-NEXT: store i32 [[SUB3_I]], i32* [[DOTCAPTURE_EXPR_2_I]], align 4, !noalias !64 +// CHECK4-NEXT: store i32* [[A_I]], i32** [[TMP4_I]], align 8, !noalias !64 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32*, i32** [[TMP4_I]], align 8, !noalias !64 // CHECK4-NEXT: store i32 0, i32* [[TMP27]], align 4 -// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !63 +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__I]], align 4, !noalias !64 // CHECK4-NEXT: [[CMP_I:%.*]] = icmp slt i32 0, [[TMP28]] // CHECK4-NEXT: br i1 [[CMP_I]], label [[TASKLOOP_IF_THEN_I:%.*]], label [[DOTOMP_OUTLINED__9_EXIT:%.*]] // CHECK4: taskloop.if.then.i: -// CHECK4-NEXT: store i32* [[A5_I]], i32** [[TMP6_I]], align 8, !noalias !63 -// CHECK4-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !63 +// CHECK4-NEXT: store i32* [[A5_I]], i32** [[TMP6_I]], align 8, !noalias !64 +// CHECK4-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !64 // CHECK4-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP29]] to i32 -// CHECK4-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !63 +// CHECK4-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !64 // CHECK4-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON_4]], %struct.anon.4* [[TMP20]], i32 0, i32 1 // CHECK4-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP30]], align 8 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK4: omp.inner.for.cond.i: -// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !63, !llvm.access.group !64 +// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !64, !llvm.access.group !65 // CHECK4-NEXT: [[CONV7_I:%.*]] = sext i32 [[TMP32]] to i64 -// CHECK4-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !63, !llvm.access.group !64 +// CHECK4-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !64, !llvm.access.group !65 // CHECK4-NEXT: [[CMP8_I:%.*]] = icmp ule i64 [[CONV7_I]], [[TMP33]] // CHECK4-NEXT: br i1 [[CMP8_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]] // CHECK4: omp.inner.for.body.i: -// CHECK4-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !63, !llvm.access.group !64 -// CHECK4-NEXT: [[TMP35:%.*]] = load i32*, i32** [[TMP6_I]], align 8, !noalias !63, !llvm.access.group !64 -// CHECK4-NEXT: store i32 [[TMP34]], i32* [[TMP35]], align 4, !llvm.access.group !64 -// CHECK4-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !63, !llvm.access.group !64 +// CHECK4-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !64, !llvm.access.group !65 +// CHECK4-NEXT: [[TMP35:%.*]] = load i32*, i32** [[TMP6_I]], align 8, !noalias !64, !llvm.access.group !65 +// CHECK4-NEXT: store i32 [[TMP34]], i32* [[TMP35]], align 4, !llvm.access.group !65 +// CHECK4-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !64, !llvm.access.group !65 // CHECK4-NEXT: [[ADD9_I:%.*]] = add nsw i32 [[TMP36]], 1 -// CHECK4-NEXT: store i32 [[ADD9_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !63, !llvm.access.group !64 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP65:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD9_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !64, !llvm.access.group !65 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP66:![0-9]+]] // CHECK4: omp.inner.for.end.i: // CHECK4-NEXT: br label [[DOTOMP_OUTLINED__9_EXIT]] // CHECK4: .omp_outlined..9.exit: diff --git a/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp b/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp index cc7ec4c5d3c87..28b487ff22019 100644 --- a/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp +++ b/clang/test/OpenMP/parallel_master_taskloop_simd_lastprivate_codegen.cpp @@ -420,25 +420,25 @@ void loop() { // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK1-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, %struct.S**, i32**, [2 x %struct.S]**, [2 x i32]**, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, %struct.S**, i32**, [2 x %struct.S]**, [2 x i32]**, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// CHECK1-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, %struct.S**, i32**, [2 x %struct.S]**, [2 x i32]**, i32**)* // CHECK1-NEXT: call void [[TMP25]](i8* [[TMP24]], %struct.S** [[DOTLASTPRIV_PTR_ADDR_I]], i32** [[DOTLASTPRIV_PTR_ADDR1_I]], [2 x %struct.S]** [[DOTLASTPRIV_PTR_ADDR2_I]], [2 x i32]** [[DOTLASTPRIV_PTR_ADDR3_I]], i32** [[DOTLASTPRIV_PTR_ADDR4_I]]) #[[ATTR4]] // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 3 @@ -455,38 +455,38 @@ void loop() { // CHECK1-NEXT: [[TMP37:%.*]] = load %struct.S*, %struct.S** [[TMP36]], align 8 // CHECK1-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 4 // CHECK1-NEXT: [[TMP39:%.*]] = load i32*, i32** [[TMP38]], align 8 -// CHECK1-NEXT: [[TMP40:%.*]] = load %struct.S*, %struct.S** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP42:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[DOTLASTPRIV_PTR_ADDR2_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP43:%.*]] = load [2 x i32]*, [2 x i32]** [[DOTLASTPRIV_PTR_ADDR3_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR4_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 +// CHECK1-NEXT: [[TMP40:%.*]] = load %struct.S*, %struct.S** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP42:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[DOTLASTPRIV_PTR_ADDR2_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP43:%.*]] = load [2 x i32]*, [2 x i32]** [[DOTLASTPRIV_PTR_ADDR3_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR4_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP45]] to i32 -// CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK1: omp.inner.for.cond.i: -// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 // CHECK1-NEXT: [[CONV5_I:%.*]] = sext i32 [[TMP46]] to i64 -// CHECK1-NEXT: [[TMP47:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !13, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP47:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14, !llvm.access.group !15 // CHECK1-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV5_I]], [[TMP47]] // CHECK1-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]] // CHECK1: omp.inner.for.body.i: -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK1-NEXT: store i32 [[TMP48]], i32* [[I_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[TMP41]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK1-NEXT: store i32 [[TMP48]], i32* [[I_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[TMP41]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP43]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP49]], i32* [[ARRAYIDX_I]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: store i32 [[TMP49]], i32* [[ARRAYIDX_I]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP42]], i64 0, i64 0 // CHECK1-NEXT: [[TMP50:%.*]] = bitcast %struct.S* [[ARRAYIDX6_I]] to i8* // CHECK1-NEXT: [[TMP51:%.*]] = bitcast %struct.S* [[TMP40]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP50]], i8* align 8 [[TMP51]], i64 8, i1 false) #[[ATTR4]], !llvm.access.group !14 -// CHECK1-NEXT: store i32 33, i32* [[TMP44]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP50]], i8* align 8 [[TMP51]], i64 8, i1 false) #[[ATTR4]], !llvm.access.group !15 +// CHECK1-NEXT: store i32 33, i32* [[TMP44]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 // CHECK1-NEXT: [[ADD7_I:%.*]] = add nsw i32 [[TMP52]], 1 -// CHECK1-NEXT: store i32 [[ADD7_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD7_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end.i: -// CHECK1-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 +// CHECK1-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: [[TMP54:%.*]] = icmp ne i32 [[TMP53]], 0 // CHECK1-NEXT: br i1 [[TMP54]], label [[DOTOMP_LASTPRIVATE_THEN_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] // CHECK1: .omp.lastprivate.then.i: @@ -835,25 +835,25 @@ void loop() { // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 64 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK1-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META27:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !31 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !31 -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !31 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.3*, i32**, [2 x i32]**, [2 x %struct.S.0]**, %struct.S.0**)* @.omp_task_privates_map..4 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !31 -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !31 -// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !31 -// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !31 -// CHECK1-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !31 -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !31 -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !31 -// CHECK1-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 -// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 -// CHECK1-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !31 -// CHECK1-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !31 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META26:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META28:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !32 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !32 +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !32 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.3*, i32**, [2 x i32]**, [2 x %struct.S.0]**, %struct.S.0**)* @.omp_task_privates_map..4 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !32 +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !32 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !32 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !32 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !32 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !32 +// CHECK1-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !32 +// CHECK1-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !32 +// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !32 +// CHECK1-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !32 +// CHECK1-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !32 // CHECK1-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, i32**, [2 x i32]**, [2 x %struct.S.0]**, %struct.S.0**)* // CHECK1-NEXT: call void [[TMP25]](i8* [[TMP24]], i32** [[DOTLASTPRIV_PTR_ADDR_I]], [2 x i32]** [[DOTLASTPRIV_PTR_ADDR1_I]], [2 x %struct.S.0]** [[DOTLASTPRIV_PTR_ADDR2_I]], %struct.S.0** [[DOTLASTPRIV_PTR_ADDR3_I]]) #[[ATTR4]] // CHECK1-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP22]], i32 0, i32 1 @@ -868,36 +868,36 @@ void loop() { // CHECK1-NEXT: [[TMP35:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP34]], align 8 // CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP22]], i32 0, i32 3 // CHECK1-NEXT: [[TMP37:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP36]], align 8 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !31 -// CHECK1-NEXT: [[TMP39:%.*]] = load [2 x i32]*, [2 x i32]** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !31 -// CHECK1-NEXT: [[TMP40:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[DOTLASTPRIV_PTR_ADDR2_I]], align 8, !noalias !31 -// CHECK1-NEXT: [[TMP41:%.*]] = load %struct.S.0*, %struct.S.0** [[DOTLASTPRIV_PTR_ADDR3_I]], align 8, !noalias !31 -// CHECK1-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !31 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !32 +// CHECK1-NEXT: [[TMP39:%.*]] = load [2 x i32]*, [2 x i32]** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !32 +// CHECK1-NEXT: [[TMP40:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[DOTLASTPRIV_PTR_ADDR2_I]], align 8, !noalias !32 +// CHECK1-NEXT: [[TMP41:%.*]] = load %struct.S.0*, %struct.S.0** [[DOTLASTPRIV_PTR_ADDR3_I]], align 8, !noalias !32 +// CHECK1-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !32 // CHECK1-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP42]] to i32 -// CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !31 +// CHECK1-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !32 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK1: omp.inner.for.cond.i: -// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !32, !llvm.access.group !33 // CHECK1-NEXT: [[CONV4_I:%.*]] = sext i32 [[TMP43]] to i64 -// CHECK1-NEXT: [[TMP44:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !31, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP44:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !32, !llvm.access.group !33 // CHECK1-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV4_I]], [[TMP44]] // CHECK1-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]] // CHECK1: omp.inner.for.body.i: -// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group !32 -// CHECK1-NEXT: store i32 [[TMP45]], i32* [[I_I]], align 4, !noalias !31, !llvm.access.group !32 -// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP38]], align 128, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !32, !llvm.access.group !33 +// CHECK1-NEXT: store i32 [[TMP45]], i32* [[I_I]], align 4, !noalias !32, !llvm.access.group !33 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP38]], align 128, !llvm.access.group !33 // CHECK1-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP39]], i64 0, i64 0 -// CHECK1-NEXT: store i32 [[TMP46]], i32* [[ARRAYIDX_I]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: store i32 [[TMP46]], i32* [[ARRAYIDX_I]], align 4, !llvm.access.group !33 // CHECK1-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP40]], i64 0, i64 0 // CHECK1-NEXT: [[TMP47:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5_I]] to i8* // CHECK1-NEXT: [[TMP48:%.*]] = bitcast %struct.S.0* [[TMP41]] to i8* -// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 4, i1 false) #[[ATTR4]], !llvm.access.group !32 -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group !32 +// CHECK1-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 4, i1 false) #[[ATTR4]], !llvm.access.group !33 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !32, !llvm.access.group !33 // CHECK1-NEXT: [[ADD6_I:%.*]] = add nsw i32 [[TMP49]], 1 -// CHECK1-NEXT: store i32 [[ADD6_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group !32 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD6_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !32, !llvm.access.group !33 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK1: omp.inner.for.end.i: -// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !31 +// CHECK1-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !32 // CHECK1-NEXT: [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0 // CHECK1-NEXT: br i1 [[TMP51]], label [[DOTOMP_LASTPRIVATE_THEN_I:%.*]], label [[DOTOMP_OUTLINED__3_EXIT:%.*]] // CHECK1: .omp.lastprivate.then.i: @@ -1254,25 +1254,25 @@ void loop() { // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK2-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, %struct.S**, i32**, [2 x %struct.S]**, [2 x i32]**, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 -// CHECK2-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, %struct.S**, i32**, [2 x %struct.S]**, [2 x i32]**, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// CHECK2-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK2-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, %struct.S**, i32**, [2 x %struct.S]**, [2 x i32]**, i32**)* // CHECK2-NEXT: call void [[TMP25]](i8* [[TMP24]], %struct.S** [[DOTLASTPRIV_PTR_ADDR_I]], i32** [[DOTLASTPRIV_PTR_ADDR1_I]], [2 x %struct.S]** [[DOTLASTPRIV_PTR_ADDR2_I]], [2 x i32]** [[DOTLASTPRIV_PTR_ADDR3_I]], i32** [[DOTLASTPRIV_PTR_ADDR4_I]]) #[[ATTR4]] // CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 3 @@ -1289,38 +1289,38 @@ void loop() { // CHECK2-NEXT: [[TMP37:%.*]] = load %struct.S*, %struct.S** [[TMP36]], align 8 // CHECK2-NEXT: [[TMP38:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 4 // CHECK2-NEXT: [[TMP39:%.*]] = load i32*, i32** [[TMP38]], align 8 -// CHECK2-NEXT: [[TMP40:%.*]] = load %struct.S*, %struct.S** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP42:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[DOTLASTPRIV_PTR_ADDR2_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP43:%.*]] = load [2 x i32]*, [2 x i32]** [[DOTLASTPRIV_PTR_ADDR3_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR4_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 +// CHECK2-NEXT: [[TMP40:%.*]] = load %struct.S*, %struct.S** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP42:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[DOTLASTPRIV_PTR_ADDR2_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP43:%.*]] = load [2 x i32]*, [2 x i32]** [[DOTLASTPRIV_PTR_ADDR3_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR4_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK2-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP45]] to i32 -// CHECK2-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK2-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK2: omp.inner.for.cond.i: -// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 // CHECK2-NEXT: [[CONV5_I:%.*]] = sext i32 [[TMP46]] to i64 -// CHECK2-NEXT: [[TMP47:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !13, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP47:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14, !llvm.access.group !15 // CHECK2-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV5_I]], [[TMP47]] // CHECK2-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]] // CHECK2: omp.inner.for.body.i: -// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK2-NEXT: store i32 [[TMP48]], i32* [[I_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[TMP41]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK2-NEXT: store i32 [[TMP48]], i32* [[I_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[TMP41]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP43]], i64 0, i64 0 -// CHECK2-NEXT: store i32 [[TMP49]], i32* [[ARRAYIDX_I]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: store i32 [[TMP49]], i32* [[ARRAYIDX_I]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[ARRAYIDX6_I:%.*]] = getelementptr inbounds [2 x %struct.S], [2 x %struct.S]* [[TMP42]], i64 0, i64 0 // CHECK2-NEXT: [[TMP50:%.*]] = bitcast %struct.S* [[ARRAYIDX6_I]] to i8* // CHECK2-NEXT: [[TMP51:%.*]] = bitcast %struct.S* [[TMP40]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP50]], i8* align 8 [[TMP51]], i64 8, i1 false) #[[ATTR4]], !llvm.access.group !14 -// CHECK2-NEXT: store i32 33, i32* [[TMP44]], align 4, !llvm.access.group !14 -// CHECK2-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 16 [[TMP50]], i8* align 8 [[TMP51]], i64 8, i1 false) #[[ATTR4]], !llvm.access.group !15 +// CHECK2-NEXT: store i32 33, i32* [[TMP44]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 // CHECK2-NEXT: [[ADD7_I:%.*]] = add nsw i32 [[TMP52]], 1 -// CHECK2-NEXT: store i32 [[ADD7_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD7_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK2: omp.inner.for.end.i: -// CHECK2-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 +// CHECK2-NEXT: [[TMP53:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK2-NEXT: [[TMP54:%.*]] = icmp ne i32 [[TMP53]], 0 // CHECK2-NEXT: br i1 [[TMP54]], label [[DOTOMP_LASTPRIVATE_THEN_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] // CHECK2: .omp.lastprivate.then.i: @@ -1669,25 +1669,25 @@ void loop() { // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 64 // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK2-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META27:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !31 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !31 -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !31 -// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.3*, i32**, [2 x i32]**, [2 x %struct.S.0]**, %struct.S.0**)* @.omp_task_privates_map..4 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !31 -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !31 -// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !31 -// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !31 -// CHECK2-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !31 -// CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !31 -// CHECK2-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !31 -// CHECK2-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 -// CHECK2-NEXT: [[TMP22:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 -// CHECK2-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !31 -// CHECK2-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !31 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META26:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META28:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !32 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !32 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !32 +// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.3*, i32**, [2 x i32]**, [2 x %struct.S.0]**, %struct.S.0**)* @.omp_task_privates_map..4 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !32 +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !32 +// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !32 +// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !32 +// CHECK2-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !32 +// CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !32 +// CHECK2-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !32 +// CHECK2-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !32 +// CHECK2-NEXT: [[TMP22:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !32 +// CHECK2-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !32 +// CHECK2-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !32 // CHECK2-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, i32**, [2 x i32]**, [2 x %struct.S.0]**, %struct.S.0**)* // CHECK2-NEXT: call void [[TMP25]](i8* [[TMP24]], i32** [[DOTLASTPRIV_PTR_ADDR_I]], [2 x i32]** [[DOTLASTPRIV_PTR_ADDR1_I]], [2 x %struct.S.0]** [[DOTLASTPRIV_PTR_ADDR2_I]], %struct.S.0** [[DOTLASTPRIV_PTR_ADDR3_I]]) #[[ATTR4]] // CHECK2-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP22]], i32 0, i32 1 @@ -1702,36 +1702,36 @@ void loop() { // CHECK2-NEXT: [[TMP35:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP34]], align 8 // CHECK2-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT_ANON_1]], %struct.anon.1* [[TMP22]], i32 0, i32 3 // CHECK2-NEXT: [[TMP37:%.*]] = load %struct.S.0*, %struct.S.0** [[TMP36]], align 8 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !31 -// CHECK2-NEXT: [[TMP39:%.*]] = load [2 x i32]*, [2 x i32]** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !31 -// CHECK2-NEXT: [[TMP40:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[DOTLASTPRIV_PTR_ADDR2_I]], align 8, !noalias !31 -// CHECK2-NEXT: [[TMP41:%.*]] = load %struct.S.0*, %struct.S.0** [[DOTLASTPRIV_PTR_ADDR3_I]], align 8, !noalias !31 -// CHECK2-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !31 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !32 +// CHECK2-NEXT: [[TMP39:%.*]] = load [2 x i32]*, [2 x i32]** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !32 +// CHECK2-NEXT: [[TMP40:%.*]] = load [2 x %struct.S.0]*, [2 x %struct.S.0]** [[DOTLASTPRIV_PTR_ADDR2_I]], align 8, !noalias !32 +// CHECK2-NEXT: [[TMP41:%.*]] = load %struct.S.0*, %struct.S.0** [[DOTLASTPRIV_PTR_ADDR3_I]], align 8, !noalias !32 +// CHECK2-NEXT: [[TMP42:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !32 // CHECK2-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP42]] to i32 -// CHECK2-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !31 +// CHECK2-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !32 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK2: omp.inner.for.cond.i: -// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !32, !llvm.access.group !33 // CHECK2-NEXT: [[CONV4_I:%.*]] = sext i32 [[TMP43]] to i64 -// CHECK2-NEXT: [[TMP44:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !31, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP44:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !32, !llvm.access.group !33 // CHECK2-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV4_I]], [[TMP44]] // CHECK2-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]] // CHECK2: omp.inner.for.body.i: -// CHECK2-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group !32 -// CHECK2-NEXT: store i32 [[TMP45]], i32* [[I_I]], align 4, !noalias !31, !llvm.access.group !32 -// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP38]], align 128, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP45:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !32, !llvm.access.group !33 +// CHECK2-NEXT: store i32 [[TMP45]], i32* [[I_I]], align 4, !noalias !32, !llvm.access.group !33 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[TMP38]], align 128, !llvm.access.group !33 // CHECK2-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds [2 x i32], [2 x i32]* [[TMP39]], i64 0, i64 0 -// CHECK2-NEXT: store i32 [[TMP46]], i32* [[ARRAYIDX_I]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: store i32 [[TMP46]], i32* [[ARRAYIDX_I]], align 4, !llvm.access.group !33 // CHECK2-NEXT: [[ARRAYIDX5_I:%.*]] = getelementptr inbounds [2 x %struct.S.0], [2 x %struct.S.0]* [[TMP40]], i64 0, i64 0 // CHECK2-NEXT: [[TMP47:%.*]] = bitcast %struct.S.0* [[ARRAYIDX5_I]] to i8* // CHECK2-NEXT: [[TMP48:%.*]] = bitcast %struct.S.0* [[TMP41]] to i8* -// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 4, i1 false) #[[ATTR4]], !llvm.access.group !32 -// CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group !32 +// CHECK2-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP47]], i8* align 4 [[TMP48]], i64 4, i1 false) #[[ATTR4]], !llvm.access.group !33 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !32, !llvm.access.group !33 // CHECK2-NEXT: [[ADD6_I:%.*]] = add nsw i32 [[TMP49]], 1 -// CHECK2-NEXT: store i32 [[ADD6_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !31, !llvm.access.group !32 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD6_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !32, !llvm.access.group !33 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK2: omp.inner.for.end.i: -// CHECK2-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !31 +// CHECK2-NEXT: [[TMP50:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !32 // CHECK2-NEXT: [[TMP51:%.*]] = icmp ne i32 [[TMP50]], 0 // CHECK2-NEXT: br i1 [[TMP51]], label [[DOTOMP_LASTPRIVATE_THEN_I:%.*]], label [[DOTOMP_OUTLINED__3_EXIT:%.*]] // CHECK2: .omp.lastprivate.then.i: @@ -1983,59 +1983,59 @@ void loop() { // CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK3-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, double**, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 -// CHECK3-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, double**, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// CHECK3-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, double**, i32**)* // CHECK3-NEXT: call void [[TMP25]](i8* [[TMP24]], double** [[DOTLASTPRIV_PTR_ADDR_I]], i32** [[DOTLASTPRIV_PTR_ADDR1_I]]) #[[ATTR3:[0-9]+]] // CHECK3-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 0 // CHECK3-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP26]], align 8 // CHECK3-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 1 // CHECK3-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP28]], align 8 -// CHECK3-NEXT: [[TMP30:%.*]] = load double*, double** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !13 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !13 -// CHECK3-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 +// CHECK3-NEXT: [[TMP30:%.*]] = load double*, double** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 +// CHECK3-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK3-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP32]] to i32 -// CHECK3-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK3-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK3: omp.inner.for.cond.i: -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 // CHECK3-NEXT: [[CONV2_I:%.*]] = sext i32 [[TMP33]] to i64 -// CHECK3-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !13, !llvm.access.group !14 +// CHECK3-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14, !llvm.access.group !15 // CHECK3-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV2_I]], [[TMP34]] // CHECK3-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]] // CHECK3: omp.inner.for.body.i: -// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK3-NEXT: store i32 [[TMP35]], i32* [[I_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK3-NEXT: store double 1.000000e+00, double* [[TMP30]], align 8, !llvm.access.group !14 -// CHECK3-NEXT: store i32 11, i32* [[TMP31]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK3-NEXT: store i32 [[TMP35]], i32* [[I_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK3-NEXT: store double 1.000000e+00, double* [[TMP30]], align 8, !llvm.access.group !15 +// CHECK3-NEXT: store i32 11, i32* [[TMP31]], align 4, !llvm.access.group !15 // CHECK3-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP_I]], i32 0, i32 0 -// CHECK3-NEXT: store double* [[TMP30]], double** [[TMP36]], align 8, !noalias !13, !llvm.access.group !14 +// CHECK3-NEXT: store double* [[TMP30]], double** [[TMP36]], align 8, !noalias !14, !llvm.access.group !15 // CHECK3-NEXT: [[TMP37:%.*]] = getelementptr inbounds [[CLASS_ANON_0]], %class.anon.0* [[REF_TMP_I]], i32 0, i32 1 -// CHECK3-NEXT: store i32* [[TMP31]], i32** [[TMP37]], align 8, !noalias !13, !llvm.access.group !14 -// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(16) [[REF_TMP_I]]) #[[ATTR3]], !llvm.access.group !14 -// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 +// CHECK3-NEXT: store i32* [[TMP31]], i32** [[TMP37]], align 8, !noalias !14, !llvm.access.group !15 +// CHECK3-NEXT: call void @"_ZZZ4mainENK3$_0clEvENKUlvE_clEv"(%class.anon.0* nonnull align 8 dereferenceable(16) [[REF_TMP_I]]) #[[ATTR3]], !llvm.access.group !15 +// CHECK3-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 // CHECK3-NEXT: [[ADD3_I:%.*]] = add nsw i32 [[TMP38]], 1 -// CHECK3-NEXT: store i32 [[ADD3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK3: omp.inner.for.end.i: -// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 +// CHECK3-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK3-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 // CHECK3-NEXT: br i1 [[TMP40]], label [[DOTOMP_LASTPRIVATE_THEN_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] // CHECK3: .omp.lastprivate.then.i: @@ -2212,77 +2212,77 @@ void loop() { // CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 // CHECK4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK4-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, double**, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 -// CHECK4-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, double**, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// CHECK4-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: [[TMP23:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK4-NEXT: [[TMP25:%.*]] = bitcast void (i8*, ...)* [[TMP23]] to void (i8*, double**, i32**)* // CHECK4-NEXT: call void [[TMP25]](i8* [[TMP24]], double** [[DOTLASTPRIV_PTR_ADDR_I]], i32** [[DOTLASTPRIV_PTR_ADDR1_I]]) #[[ATTR4:[0-9]+]] // CHECK4-NEXT: [[TMP26:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 0 // CHECK4-NEXT: [[TMP27:%.*]] = load double*, double** [[TMP26]], align 8 // CHECK4-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 1 // CHECK4-NEXT: [[TMP29:%.*]] = load i32*, i32** [[TMP28]], align 8 -// CHECK4-NEXT: [[TMP30:%.*]] = load double*, double** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !13 -// CHECK4-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !13 -// CHECK4-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 +// CHECK4-NEXT: [[TMP30:%.*]] = load double*, double** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// CHECK4-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 +// CHECK4-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK4-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP32]] to i32 -// CHECK4-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK4-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK4: omp.inner.for.cond.i: -// CHECK4-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 +// CHECK4-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 // CHECK4-NEXT: [[CONV2_I:%.*]] = sext i32 [[TMP33]] to i64 -// CHECK4-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !13, !llvm.access.group !14 +// CHECK4-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14, !llvm.access.group !15 // CHECK4-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV2_I]], [[TMP34]] // CHECK4-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]] // CHECK4: omp.inner.for.body.i: -// CHECK4-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK4-NEXT: store i32 [[TMP35]], i32* [[I_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK4-NEXT: store double 1.000000e+00, double* [[TMP30]], align 8, !llvm.access.group !14 -// CHECK4-NEXT: store i32 11, i32* [[TMP31]], align 4, !llvm.access.group !14 +// CHECK4-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK4-NEXT: store i32 [[TMP35]], i32* [[I_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK4-NEXT: store double 1.000000e+00, double* [[TMP30]], align 8, !llvm.access.group !15 +// CHECK4-NEXT: store i32 11, i32* [[TMP31]], align 4, !llvm.access.group !15 // CHECK4-NEXT: [[BLOCK_ISA_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 0 -// CHECK4-NEXT: store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** [[BLOCK_ISA_I]], align 8, !noalias !13, !llvm.access.group !14 +// CHECK4-NEXT: store i8* bitcast (i8** @_NSConcreteStackBlock to i8*), i8** [[BLOCK_ISA_I]], align 8, !noalias !14, !llvm.access.group !15 // CHECK4-NEXT: [[BLOCK_FLAGS_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 1 -// CHECK4-NEXT: store i32 1073741824, i32* [[BLOCK_FLAGS_I]], align 8, !noalias !13, !llvm.access.group !14 +// CHECK4-NEXT: store i32 1073741824, i32* [[BLOCK_FLAGS_I]], align 8, !noalias !14, !llvm.access.group !15 // CHECK4-NEXT: [[BLOCK_RESERVED_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 2 -// CHECK4-NEXT: store i32 0, i32* [[BLOCK_RESERVED_I]], align 4, !noalias !13, !llvm.access.group !14 +// CHECK4-NEXT: store i32 0, i32* [[BLOCK_RESERVED_I]], align 4, !noalias !14, !llvm.access.group !15 // CHECK4-NEXT: [[BLOCK_INVOKE_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 3 -// CHECK4-NEXT: store i8* bitcast (void (i8*)* @_block_invoke to i8*), i8** [[BLOCK_INVOKE_I]], align 8, !noalias !13, !llvm.access.group !14 +// CHECK4-NEXT: store i8* bitcast (void (i8*)* @_block_invoke to i8*), i8** [[BLOCK_INVOKE_I]], align 8, !noalias !14, !llvm.access.group !15 // CHECK4-NEXT: [[BLOCK_DESCRIPTOR_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 4 -// CHECK4-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp.2 to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR_I]], align 8, !noalias !13, !llvm.access.group !14 +// CHECK4-NEXT: store %struct.__block_descriptor* bitcast ({ i64, i64, i8*, i8* }* @__block_descriptor_tmp.2 to %struct.__block_descriptor*), %struct.__block_descriptor** [[BLOCK_DESCRIPTOR_I]], align 8, !noalias !14, !llvm.access.group !15 // CHECK4-NEXT: [[BLOCK_CAPTURED_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 5 -// CHECK4-NEXT: [[TMP36:%.*]] = load volatile double, double* [[TMP30]], align 8, !llvm.access.group !14 -// CHECK4-NEXT: store volatile double [[TMP36]], double* [[BLOCK_CAPTURED_I]], align 8, !noalias !13, !llvm.access.group !14 +// CHECK4-NEXT: [[TMP36:%.*]] = load volatile double, double* [[TMP30]], align 8, !llvm.access.group !15 +// CHECK4-NEXT: store volatile double [[TMP36]], double* [[BLOCK_CAPTURED_I]], align 8, !noalias !14, !llvm.access.group !15 // CHECK4-NEXT: [[BLOCK_CAPTURED3_I:%.*]] = getelementptr inbounds <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>, <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]], i32 0, i32 6 -// CHECK4-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP31]], align 4, !llvm.access.group !14 -// CHECK4-NEXT: store i32 [[TMP37]], i32* [[BLOCK_CAPTURED3_I]], align 8, !noalias !13, !llvm.access.group !14 +// CHECK4-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP31]], align 4, !llvm.access.group !15 +// CHECK4-NEXT: store i32 [[TMP37]], i32* [[BLOCK_CAPTURED3_I]], align 8, !noalias !14, !llvm.access.group !15 // CHECK4-NEXT: [[TMP38:%.*]] = bitcast <{ i8*, i32, i32, i8*, %struct.__block_descriptor*, double, i32 }>* [[BLOCK_I]] to void ()* // CHECK4-NEXT: [[BLOCK_LITERAL_I:%.*]] = bitcast void ()* [[TMP38]] to %struct.__block_literal_generic* // CHECK4-NEXT: [[TMP39:%.*]] = getelementptr inbounds [[STRUCT___BLOCK_LITERAL_GENERIC:%.*]], %struct.__block_literal_generic* [[BLOCK_LITERAL_I]], i32 0, i32 3 // CHECK4-NEXT: [[TMP40:%.*]] = bitcast %struct.__block_literal_generic* [[BLOCK_LITERAL_I]] to i8* -// CHECK4-NEXT: [[TMP41:%.*]] = load i8*, i8** [[TMP39]], align 8, !noalias !13, !llvm.access.group !14 +// CHECK4-NEXT: [[TMP41:%.*]] = load i8*, i8** [[TMP39]], align 8, !noalias !14, !llvm.access.group !15 // CHECK4-NEXT: [[TMP42:%.*]] = bitcast i8* [[TMP41]] to void (i8*)* -// CHECK4-NEXT: call void [[TMP42]](i8* [[TMP40]]) #[[ATTR4]], !llvm.access.group !14 -// CHECK4-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 +// CHECK4-NEXT: call void [[TMP42]](i8* [[TMP40]]) #[[ATTR4]], !llvm.access.group !15 +// CHECK4-NEXT: [[TMP43:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 // CHECK4-NEXT: [[ADD4_I:%.*]] = add nsw i32 [[TMP43]], 1 -// CHECK4-NEXT: store i32 [[ADD4_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD4_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK4: omp.inner.for.end.i: -// CHECK4-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 +// CHECK4-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK4-NEXT: [[TMP45:%.*]] = icmp ne i32 [[TMP44]], 0 // CHECK4-NEXT: br i1 [[TMP45]], label [[DOTOMP_LASTPRIVATE_THEN_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] // CHECK4: .omp.lastprivate.then.i: @@ -2446,54 +2446,54 @@ void loop() { // CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 // CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK5-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK5-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK5-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK5-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, float***, %struct.St***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK5-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK5-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 -// CHECK5-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 -// CHECK5-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !13 -// CHECK5-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 -// CHECK5-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !13 -// CHECK5-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK5-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK5-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK5-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK5-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, float***, %struct.St***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK5-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK5-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK5-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK5-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// CHECK5-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// CHECK5-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// CHECK5-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK5-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK5-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 0 // CHECK5-NEXT: [[TMP24:%.*]] = load i64, i64* [[TMP23]], align 8 -// CHECK5-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK5-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 +// CHECK5-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK5-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK5-NEXT: [[TMP27:%.*]] = bitcast void (i8*, ...)* [[TMP25]] to void (i8*, float***, %struct.St***)* // CHECK5-NEXT: call void [[TMP27]](i8* [[TMP26]], float*** [[DOTLASTPRIV_PTR_ADDR_I]], %struct.St*** [[DOTLASTPRIV_PTR_ADDR1_I]]) #[[ATTR2:[0-9]+]] // CHECK5-NEXT: [[TMP28:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 1 // CHECK5-NEXT: [[TMP29:%.*]] = load float**, float*** [[TMP28]], align 8 // CHECK5-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 2 // CHECK5-NEXT: [[TMP31:%.*]] = load %struct.St**, %struct.St*** [[TMP30]], align 8 -// CHECK5-NEXT: [[TMP32:%.*]] = load float**, float*** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !13 -// CHECK5-NEXT: [[TMP33:%.*]] = load %struct.St**, %struct.St*** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !13 -// CHECK5-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 +// CHECK5-NEXT: [[TMP32:%.*]] = load float**, float*** [[DOTLASTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// CHECK5-NEXT: [[TMP33:%.*]] = load %struct.St**, %struct.St*** [[DOTLASTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 +// CHECK5-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK5-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP34]] to i32 -// CHECK5-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK5-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK5: omp.inner.for.cond.i: -// CHECK5-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 +// CHECK5-NEXT: [[TMP35:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 // CHECK5-NEXT: [[CONV2_I:%.*]] = sext i32 [[TMP35]] to i64 -// CHECK5-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !13, !llvm.access.group !14 +// CHECK5-NEXT: [[TMP36:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14, !llvm.access.group !15 // CHECK5-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV2_I]], [[TMP36]] // CHECK5-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]] // CHECK5: omp.inner.for.body.i: -// CHECK5-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK5-NEXT: store i32 [[TMP37]], i32* [[I_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK5-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 +// CHECK5-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK5-NEXT: store i32 [[TMP37]], i32* [[I_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK5-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 // CHECK5-NEXT: [[ADD3_I:%.*]] = add nsw i32 [[TMP38]], 1 -// CHECK5-NEXT: store i32 [[ADD3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK5: omp.inner.for.end.i: -// CHECK5-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 +// CHECK5-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK5-NEXT: [[TMP40:%.*]] = icmp ne i32 [[TMP39]], 0 // CHECK5-NEXT: br i1 [[TMP40]], label [[DOTOMP_LASTPRIVATE_THEN_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] // CHECK5: .omp.lastprivate.then.i: @@ -2625,65 +2625,65 @@ void loop() { // CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[TMP16]], align 8 // CHECK6-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK6-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP18]], align 8 -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK6-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK6-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK6-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK6-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK6-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 -// CHECK6-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 -// CHECK6-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !13 -// CHECK6-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 -// CHECK6-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !13 -// CHECK6-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK6-NEXT: [[TMP20:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK6-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK6-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK6-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK6-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK6-NEXT: store i64 [[TMP11]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK6-NEXT: store i64 [[TMP13]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK6-NEXT: store i64 [[TMP15]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// CHECK6-NEXT: store i32 [[TMP17]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// CHECK6-NEXT: store i8* [[TMP19]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// CHECK6-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK6-NEXT: [[TMP20:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK6-NEXT: [[TMP21:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP20]], i32 0, i32 0 // CHECK6-NEXT: [[TMP22:%.*]] = load i32*, i32** [[TMP21]], align 8 // CHECK6-NEXT: [[TMP23:%.*]] = load i32, i32* [[TMP22]], align 4 -// CHECK6-NEXT: store i32 [[TMP23]], i32* [[DOTLINEAR_START_I]], align 4, !noalias !13 +// CHECK6-NEXT: store i32 [[TMP23]], i32* [[DOTLINEAR_START_I]], align 4, !noalias !14 // CHECK6-NEXT: [[TMP24:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP20]], i32 0, i32 1 // CHECK6-NEXT: [[TMP25:%.*]] = load i32*, i32** [[TMP24]], align 8 // CHECK6-NEXT: [[TMP26:%.*]] = load i32, i32* [[TMP25]], align 4 -// CHECK6-NEXT: store i32 [[TMP26]], i32* [[DOTLINEAR_START1_I]], align 4, !noalias !13 +// CHECK6-NEXT: store i32 [[TMP26]], i32* [[DOTLINEAR_START1_I]], align 4, !noalias !14 // CHECK6-NEXT: [[TMP27:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP20]], i32 0, i32 0 // CHECK6-NEXT: [[TMP28:%.*]] = load i32*, i32** [[TMP27]], align 8 -// CHECK6-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 +// CHECK6-NEXT: [[TMP29:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK6-NEXT: [[CONV_I:%.*]] = trunc i64 [[TMP29]] to i32 -// CHECK6-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK6-NEXT: store i32 [[CONV_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK6: omp.inner.for.cond.i: -// CHECK6-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 +// CHECK6-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 // CHECK6-NEXT: [[CONV3_I:%.*]] = sext i32 [[TMP30]] to i64 -// CHECK6-NEXT: [[TMP31:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !13, !llvm.access.group !14 +// CHECK6-NEXT: [[TMP31:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14, !llvm.access.group !15 // CHECK6-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV3_I]], [[TMP31]] // CHECK6-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[OMP_INNER_FOR_END_I:%.*]] // CHECK6: omp.inner.for.body.i: -// CHECK6-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK6-NEXT: store i32 [[TMP32]], i32* [[I_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK6-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTLINEAR_START1_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK6-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 +// CHECK6-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK6-NEXT: store i32 [[TMP32]], i32* [[I_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK6-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTLINEAR_START1_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK6-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 // CHECK6-NEXT: [[ADD5_I:%.*]] = add nsw i32 [[TMP33]], [[TMP34]] -// CHECK6-NEXT: store i32 [[ADD5_I]], i32* [[J_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK6-NEXT: [[TMP35:%.*]] = load i32, i32* [[J_I]], align 4, !noalias !13, !llvm.access.group !14 +// CHECK6-NEXT: store i32 [[ADD5_I]], i32* [[J_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK6-NEXT: [[TMP35:%.*]] = load i32, i32* [[J_I]], align 4, !noalias !14, !llvm.access.group !15 // CHECK6-NEXT: [[INC_I:%.*]] = add nsw i32 [[TMP35]], 1 -// CHECK6-NEXT: store i32 [[INC_I]], i32* [[J_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK6-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 +// CHECK6-NEXT: store i32 [[INC_I]], i32* [[J_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK6-NEXT: [[TMP36:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 // CHECK6-NEXT: [[ADD6_I:%.*]] = add nsw i32 [[TMP36]], 1 -// CHECK6-NEXT: store i32 [[ADD6_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD6_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK6: omp.inner.for.end.i: -// CHECK6-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 +// CHECK6-NEXT: [[TMP37:%.*]] = load i32, i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 // CHECK6-NEXT: [[TMP38:%.*]] = icmp ne i32 [[TMP37]], 0 // CHECK6-NEXT: br i1 [[TMP38]], label [[DOTOMP_LINEAR_PU_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] // CHECK6: .omp.linear.pu.i: -// CHECK6-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTLINEAR_START1_I]], align 4, !noalias !13 +// CHECK6-NEXT: [[TMP39:%.*]] = load i32, i32* [[DOTLINEAR_START1_I]], align 4, !noalias !14 // CHECK6-NEXT: [[ADD7_I:%.*]] = add nsw i32 [[TMP39]], 10 -// CHECK6-NEXT: store i32 [[ADD7_I]], i32* [[J_I]], align 4, !noalias !13 +// CHECK6-NEXT: store i32 [[ADD7_I]], i32* [[J_I]], align 4, !noalias !14 // CHECK6-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK6: .omp_outlined..1.exit: // CHECK6-NEXT: ret i32 0 diff --git a/clang/test/OpenMP/parallel_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_reduction_task_codegen.cpp index 190e639313665..88784908cf617 100644 --- a/clang/test/OpenMP/parallel_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_reduction_task_codegen.cpp @@ -398,26 +398,26 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5:[0-9]+]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !11 +// CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* @@ -451,8 +451,8 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] // CHECK1-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP48:%.*]] = getelementptr i8, i8* [[TMP40]], i64 [[TMP47]] -// CHECK1-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !11 +// CHECK1-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !12 // CHECK1-NEXT: ret i32 0 // // @@ -868,26 +868,26 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK2-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK2-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK2-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5:[0-9]+]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !11 +// CHECK2-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK2-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK2-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK2-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK2-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK2-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* @@ -921,8 +921,8 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] // CHECK2-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK2-NEXT: [[TMP48:%.*]] = getelementptr i8, i8* [[TMP40]], i64 [[TMP47]] -// CHECK2-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !11 +// CHECK2-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !12 // CHECK2-NEXT: ret i32 0 // // @@ -974,4 +974,3 @@ int main(int argc, char **argv) { // CHECK2: omp.arraycpy.done5: // CHECK2-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp b/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp index 6db52d91af866..ba58ee0a4f803 100644 --- a/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/parallel_sections_reduction_task_codegen.cpp @@ -440,26 +440,26 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5:[0-9]+]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !11 +// CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* @@ -493,8 +493,8 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] // CHECK1-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP48:%.*]] = getelementptr i8, i8* [[TMP40]], i64 [[TMP47]] -// CHECK1-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !11 +// CHECK1-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !12 // CHECK1-NEXT: ret i32 0 // // @@ -952,26 +952,26 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK2-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK2-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK2-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5:[0-9]+]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !11 +// CHECK2-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK2-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK2-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK2-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK2-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK2-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* @@ -1005,8 +1005,8 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] // CHECK2-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK2-NEXT: [[TMP48:%.*]] = getelementptr i8, i8* [[TMP40]], i64 [[TMP47]] -// CHECK2-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !11 +// CHECK2-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !12 // CHECK2-NEXT: ret i32 0 // // @@ -1058,4 +1058,3 @@ int main(int argc, char **argv) { // CHECK2: omp.arraycpy.done5: // CHECK2-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c b/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c index 31ee32fb2180d..8b45d4dc789e5 100644 --- a/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c +++ b/clang/test/OpenMP/remarks_parallel_in_multiple_target_state_machines.c @@ -1,17 +1,13 @@ -// XFAIL: * -// Failure is expected until fixed in LLORG upstream. -// TODO: remove XFAIL once the test starts to pass. - -// RUN: %clang_cc1 -verify=host -Rpass=openmp-opt -Rpass-analysis=openmp -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify=all,safe -Rpass=openmp-opt -Rpass-analysis=openmp -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out -// RUN: %clang_cc1 -fexperimental-new-pass-manager -verify=all,safe -Rpass=openmp-opt -Rpass-analysis=openmp -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out +// RUN: %clang_cc1 -verify=host -Rpass=openmp-opt -Rpass-analysis=openmp-opt -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify=all,safe -Rpass=openmp-opt -Rpass-analysis=openmp-opt -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out +// RUN: %clang_cc1 -fexperimental-new-pass-manager -verify=all,safe -Rpass=openmp-opt -Rpass-analysis=openmp-opt -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out // host-no-diagnostics void bar1(void) { #pragma omp parallel // #0 // all-remark@#0 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nested inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} - // safe-remark@#0 {{Parallel region is used in unexpected ways; will not attempt to rewrite the state machine.}} + // safe-remark@#0 {{Parallel region is used in unknown ways; will not attempt to rewrite the state machine.}} // force-remark@#0 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__2_wrapper, kernel ID: }} { } @@ -19,7 +15,7 @@ void bar1(void) { void bar2(void) { #pragma omp parallel // #1 // all-remark@#1 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nested inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} - // safe-remark@#1 {{Parallel region is used in unexpected ways; will not attempt to rewrite the state machine.}} + // safe-remark@#1 {{Parallel region is used in unknown ways; will not attempt to rewrite the state machine.}} // force-remark@#1 {{Specialize parallel region that is only reached from a single target region to avoid spurious call edges and excessive register usage in other target regions. (parallel region ID: __omp_outlined__6_wrapper, kernel ID: }} { } diff --git a/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c b/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c index df3a18e0850b7..ca6a9afa3b3f5 100644 --- a/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c +++ b/clang/test/OpenMP/remarks_parallel_in_target_state_machine.c @@ -1,17 +1,13 @@ -// XFAIL: * -// Failure is expected until fixed in LLORG upstream. -// TODO: remove XFAIL once the test starts to pass. - -// RUN: %clang_cc1 -verify=host -Rpass=openmp -Rpass-analysis=openmp-opt -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc -// RUN: %clang_cc1 -verify -Rpass=openmp -Rpass-analysis=openmp-opt -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out -// RUN: %clang_cc1 -fexperimental-new-pass-manager -verify -Rpass=openmp -Rpass-analysis=openmp-opt -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out +// RUN: %clang_cc1 -verify=host -Rpass=openmp-opt -Rpass-analysis=openmp-opt -fopenmp -x c++ -triple powerpc64le-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm-bc %s -o %t-ppc-host.bc +// RUN: %clang_cc1 -verify -Rpass=openmp-opt -Rpass-analysis=openmp-opt -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out +// RUN: %clang_cc1 -fexperimental-new-pass-manager -verify -Rpass=openmp-opt -Rpass-analysis=openmp-opt -fopenmp -O2 -x c++ -triple nvptx64-unknown-unknown -fopenmp-targets=nvptx64-nvidia-cuda -emit-llvm %s -fopenmp-is-device -fopenmp-host-ir-file-path %t-ppc-host.bc -o %t.out // host-no-diagnostics void bar(void) { #pragma omp parallel // #1 \ // expected-remark@#1 {{Found a parallel region that is called in a target region but not part of a combined target construct nor nested inside a target construct without intermediate code. This can lead to excessive register usage for unrelated target regions in the same translation unit due to spurious call edges assumed by ptxas.}} \ - // expected-remark@#1 {{Parallel region is used in unexpected ways; will not attempt to rewrite the state machine.}} + // expected-remark@#1 {{Parallel region is used in unknown ways; will not attempt to rewrite the state machine.}} { } } diff --git a/clang/test/OpenMP/schedule_codegen.cpp b/clang/test/OpenMP/schedule_codegen.cpp index 57c0df3cfa44e..fa344b6a116f4 100644 --- a/clang/test/OpenMP/schedule_codegen.cpp +++ b/clang/test/OpenMP/schedule_codegen.cpp @@ -10,7 +10,7 @@ int main() { #pragma omp for for(int i = 0; i < 10; ++i); // CHECK: @__kmpc_for_static_init -// CHECK-NOT: !llvm.access.group +// CHECK: !llvm.access.group // CHECK: @__kmpc_for_static_fini #pragma omp for simd for(int i = 0; i < 10; ++i); @@ -20,7 +20,7 @@ int main() { #pragma omp for schedule(static) for(int i = 0; i < 10; ++i); // CHECK: @__kmpc_for_static_init -// CHECK-NOT: !llvm.access.group +// CHECK: !llvm.access.group // CHECK: @__kmpc_for_static_fini #pragma omp for simd schedule(static) for(int i = 0; i < 10; ++i); @@ -30,7 +30,7 @@ int main() { #pragma omp for schedule(static, 2) for(int i = 0; i < 10; ++i); // CHECK: @__kmpc_for_static_init -// CHECK-NOT: !llvm.access.group +// CHECK: !llvm.access.group // CHECK: @__kmpc_for_static_fini #pragma omp for simd schedule(static, 2) for(int i = 0; i < 10; ++i); @@ -72,7 +72,7 @@ int main() { #pragma omp for schedule(monotonic: static) for(int i = 0; i < 10; ++i); // CHECK: @__kmpc_for_static_init -// CHECK-NOT: !llvm.access.group +// CHECK: !llvm.access.group // CHECK: @__kmpc_for_static_fini #pragma omp for simd schedule(monotonic: static) for(int i = 0; i < 10; ++i); @@ -82,7 +82,7 @@ int main() { #pragma omp for schedule(monotonic: static, 2) for(int i = 0; i < 10; ++i); // CHECK: @__kmpc_for_static_init -// CHECK-NOT: !llvm.access.group +// CHECK: !llvm.access.group // CHECK: @__kmpc_for_static_fini #pragma omp for simd schedule(monotonic: static, 2) for(int i = 0; i < 10; ++i); @@ -91,7 +91,7 @@ int main() { #pragma omp for schedule(monotonic: auto) for(int i = 0; i < 10; ++i); // CHECK: @__kmpc_dispatch_init -// CHECK-NOT: !llvm.access.group +// CHECK: !llvm.access.group #pragma omp for simd schedule(monotonic: auto) for(int i = 0; i < 10; ++i); // CHECK: @__kmpc_dispatch_init @@ -99,7 +99,7 @@ int main() { #pragma omp for schedule(monotonic: runtime) for(int i = 0; i < 10; ++i); // CHECK: @__kmpc_dispatch_init -// CHECK-NOT: !llvm.access.group +// CHECK: !llvm.access.group #pragma omp for simd schedule(monotonic: runtime) for(int i = 0; i < 10; ++i); // CHECK: @__kmpc_dispatch_init @@ -107,7 +107,7 @@ int main() { #pragma omp for schedule(monotonic: guided) for(int i = 0; i < 10; ++i); // CHECK: @__kmpc_dispatch_init -// CHECK-NOT: !llvm.access.group +// CHECK: !llvm.access.group #pragma omp for simd schedule(monotonic: guided) for(int i = 0; i < 10; ++i); // CHECK: @__kmpc_dispatch_init @@ -115,7 +115,7 @@ int main() { #pragma omp for schedule(monotonic: dynamic) for(int i = 0; i < 10; ++i); // CHECK: @__kmpc_dispatch_init -// CHECK-NOT: !llvm.access.group +// CHECK: !llvm.access.group #pragma omp for simd schedule(monotonic: dynamic) for(int i = 0; i < 10; ++i); // CHECK: @__kmpc_dispatch_init diff --git a/clang/test/OpenMP/sections_reduction_task_codegen.cpp b/clang/test/OpenMP/sections_reduction_task_codegen.cpp index 9335ae4fb8712..34abfd05b887c 100644 --- a/clang/test/OpenMP/sections_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/sections_reduction_task_codegen.cpp @@ -445,26 +445,26 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5:[0-9]+]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !11 +// CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* @@ -498,8 +498,8 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] // CHECK1-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP48:%.*]] = getelementptr i8, i8* [[TMP40]], i64 [[TMP47]] -// CHECK1-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !11 +// CHECK1-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !12 // CHECK1-NEXT: ret i32 0 // // @@ -961,26 +961,26 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK2-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK2-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK2-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5:[0-9]+]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !11 +// CHECK2-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK2-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK2-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK2-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK2-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK2-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* @@ -1014,8 +1014,8 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] // CHECK2-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK2-NEXT: [[TMP48:%.*]] = getelementptr i8, i8* [[TMP40]], i64 [[TMP47]] -// CHECK2-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !11 +// CHECK2-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !12 // CHECK2-NEXT: ret i32 0 // // @@ -1067,4 +1067,3 @@ int main(int argc, char **argv) { // CHECK2: omp.arraycpy.done5: // CHECK2-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_parallel_codegen.cpp b/clang/test/OpenMP/target_parallel_codegen.cpp index e951059304604..d98a4432a1cb9 100644 --- a/clang/test/OpenMP/target_parallel_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_codegen.cpp @@ -566,17 +566,17 @@ int bar(int n){ // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !20 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !20 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !20 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !20 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !20 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !20 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !20 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !21 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !21 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !21 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !21 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !21 +// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 // CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 1, i32 0) #[[ATTR3]] // CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK1-NEXT: br i1 [[TMP12]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] @@ -654,7 +654,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK1-NEXT: br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK1: .cancel.exit: -// CHECK1-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK1-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK1-NEXT: br label [[DOTCANCEL_CONTINUE]] // CHECK1: .cancel.continue: // CHECK1-NEXT: ret void @@ -1315,7 +1315,7 @@ int bar(int n){ // // // CHECK1-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg -// CHECK1-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK1-SAME: () #[[ATTR6:[0-9]+]] { // CHECK1-NEXT: entry: // CHECK1-NEXT: call void @__tgt_register_requires(i64 1) // CHECK1-NEXT: ret void @@ -1596,17 +1596,17 @@ int bar(int n){ // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !20 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !20 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !20 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !20 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !20 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !20 -// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !20 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !21 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !21 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !21 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !21 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !21 +// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 +// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 // CHECK2-NEXT: [[TMP11:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 1, i32 0) #[[ATTR3]] // CHECK2-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK2-NEXT: br i1 [[TMP12]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] @@ -1684,7 +1684,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK2-NEXT: br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK2: .cancel.exit: -// CHECK2-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK2-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK2-NEXT: br label [[DOTCANCEL_CONTINUE]] // CHECK2: .cancel.continue: // CHECK2-NEXT: ret void @@ -2345,7 +2345,7 @@ int bar(int n){ // // // CHECK2-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg -// CHECK2-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK2-SAME: () #[[ATTR6:[0-9]+]] { // CHECK2-NEXT: entry: // CHECK2-NEXT: call void @__tgt_register_requires(i64 1) // CHECK2-NEXT: ret void @@ -2623,17 +2623,17 @@ int bar(int n){ // CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4 // CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK3-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !21 -// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !21 -// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !21 -// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !21 -// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !21 -// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !21 -// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !21 +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !22 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !22 +// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !22 +// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !22 +// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !22 +// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !22 +// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !22 // CHECK3-NEXT: [[TMP11:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 1, i32 0) #[[ATTR3]] // CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK3-NEXT: br i1 [[TMP12]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] @@ -2708,7 +2708,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK3-NEXT: br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK3: .cancel.exit: -// CHECK3-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK3-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK3-NEXT: br label [[DOTCANCEL_CONTINUE]] // CHECK3: .cancel.continue: // CHECK3-NEXT: ret void @@ -3351,7 +3351,7 @@ int bar(int n){ // // // CHECK3-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg -// CHECK3-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK3-SAME: () #[[ATTR6:[0-9]+]] { // CHECK3-NEXT: entry: // CHECK3-NEXT: call void @__tgt_register_requires(i64 1) // CHECK3-NEXT: ret void @@ -3629,17 +3629,17 @@ int bar(int n){ // CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4 // CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK4-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !21 -// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !21 -// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !21 -// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !21 -// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !21 -// CHECK4-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !21 -// CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !21 +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !22 +// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !22 +// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !22 +// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !22 +// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !22 +// CHECK4-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !22 +// CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !22 // CHECK4-NEXT: [[TMP11:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 1, i32 0) #[[ATTR3]] // CHECK4-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK4-NEXT: br i1 [[TMP12]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] @@ -3714,7 +3714,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK4-NEXT: br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK4: .cancel.exit: -// CHECK4-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK4-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK4-NEXT: br label [[DOTCANCEL_CONTINUE]] // CHECK4: .cancel.continue: // CHECK4-NEXT: ret void @@ -4357,7 +4357,7 @@ int bar(int n){ // // // CHECK4-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg -// CHECK4-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK4-SAME: () #[[ATTR6:[0-9]+]] { // CHECK4-NEXT: entry: // CHECK4-NEXT: call void @__tgt_register_requires(i64 1) // CHECK4-NEXT: ret void @@ -4416,7 +4416,7 @@ int bar(int n){ // CHECK9-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK9-NEXT: br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK9: .cancel.exit: -// CHECK9-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK9-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK9-NEXT: br label [[DOTCANCEL_CONTINUE]] // CHECK9: .cancel.continue: // CHECK9-NEXT: ret void @@ -4830,7 +4830,7 @@ int bar(int n){ // CHECK10-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK10-NEXT: br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK10: .cancel.exit: -// CHECK10-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK10-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK10-NEXT: br label [[DOTCANCEL_CONTINUE]] // CHECK10: .cancel.continue: // CHECK10-NEXT: ret void @@ -5244,7 +5244,7 @@ int bar(int n){ // CHECK11-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK11-NEXT: br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK11: .cancel.exit: -// CHECK11-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK11-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK11-NEXT: br label [[DOTCANCEL_CONTINUE]] // CHECK11: .cancel.continue: // CHECK11-NEXT: ret void @@ -5643,7 +5643,7 @@ int bar(int n){ // CHECK12-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK12-NEXT: br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK12: .cancel.exit: -// CHECK12-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK12-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK12-NEXT: br label [[DOTCANCEL_CONTINUE]] // CHECK12: .cancel.continue: // CHECK12-NEXT: ret void @@ -6264,17 +6264,17 @@ int bar(int n){ // CHECK17-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK17-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK17-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) -// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) -// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !20 -// CHECK17-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !20 -// CHECK17-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !20 -// CHECK17-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !20 -// CHECK17-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !20 -// CHECK17-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !20 -// CHECK17-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !20 +// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) +// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !21 +// CHECK17-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !21 +// CHECK17-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !21 +// CHECK17-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !21 +// CHECK17-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !21 +// CHECK17-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 +// CHECK17-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 // CHECK17-NEXT: [[TMP11:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 1, i32 0) #[[ATTR3]] // CHECK17-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK17-NEXT: br i1 [[TMP12]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] @@ -6352,7 +6352,7 @@ int bar(int n){ // CHECK17-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK17-NEXT: br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK17: .cancel.exit: -// CHECK17-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK17-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK17-NEXT: br label [[DOTCANCEL_CONTINUE]] // CHECK17: .cancel.continue: // CHECK17-NEXT: ret void @@ -7013,7 +7013,7 @@ int bar(int n){ // // // CHECK17-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg -// CHECK17-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK17-SAME: () #[[ATTR6:[0-9]+]] { // CHECK17-NEXT: entry: // CHECK17-NEXT: call void @__tgt_register_requires(i64 1) // CHECK17-NEXT: ret void @@ -7294,17 +7294,17 @@ int bar(int n){ // CHECK18-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK18-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK18-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) -// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) -// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) -// CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !20 -// CHECK18-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !20 -// CHECK18-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !20 -// CHECK18-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !20 -// CHECK18-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !20 -// CHECK18-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !20 -// CHECK18-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !20 +// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) +// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) +// CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !21 +// CHECK18-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !21 +// CHECK18-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !21 +// CHECK18-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !21 +// CHECK18-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !21 +// CHECK18-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 +// CHECK18-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 // CHECK18-NEXT: [[TMP11:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 1, i32 0) #[[ATTR3]] // CHECK18-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK18-NEXT: br i1 [[TMP12]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] @@ -7382,7 +7382,7 @@ int bar(int n){ // CHECK18-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK18-NEXT: br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK18: .cancel.exit: -// CHECK18-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK18-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK18-NEXT: br label [[DOTCANCEL_CONTINUE]] // CHECK18: .cancel.continue: // CHECK18-NEXT: ret void @@ -8043,7 +8043,7 @@ int bar(int n){ // // // CHECK18-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg -// CHECK18-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK18-SAME: () #[[ATTR6:[0-9]+]] { // CHECK18-NEXT: entry: // CHECK18-NEXT: call void @__tgt_register_requires(i64 1) // CHECK18-NEXT: ret void @@ -8321,17 +8321,17 @@ int bar(int n){ // CHECK19-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4 // CHECK19-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK19-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) -// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) -// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !21 -// CHECK19-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !21 -// CHECK19-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !21 -// CHECK19-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !21 -// CHECK19-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !21 -// CHECK19-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !21 -// CHECK19-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !21 +// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]]) +// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) +// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !22 +// CHECK19-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !22 +// CHECK19-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !22 +// CHECK19-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !22 +// CHECK19-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !22 +// CHECK19-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !22 +// CHECK19-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !22 // CHECK19-NEXT: [[TMP11:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 1, i32 0) #[[ATTR3]] // CHECK19-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK19-NEXT: br i1 [[TMP12]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] @@ -8406,7 +8406,7 @@ int bar(int n){ // CHECK19-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK19-NEXT: br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK19: .cancel.exit: -// CHECK19-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK19-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK19-NEXT: br label [[DOTCANCEL_CONTINUE]] // CHECK19: .cancel.continue: // CHECK19-NEXT: ret void @@ -9049,7 +9049,7 @@ int bar(int n){ // // // CHECK19-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg -// CHECK19-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK19-SAME: () #[[ATTR6:[0-9]+]] { // CHECK19-NEXT: entry: // CHECK19-NEXT: call void @__tgt_register_requires(i64 1) // CHECK19-NEXT: ret void @@ -9327,17 +9327,17 @@ int bar(int n){ // CHECK20-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4 // CHECK20-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK20-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) -// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) -// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK20-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !21 -// CHECK20-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !21 -// CHECK20-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !21 -// CHECK20-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !21 -// CHECK20-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !21 -// CHECK20-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !21 -// CHECK20-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !21 +// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]]) +// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) +// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK20-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !22 +// CHECK20-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !22 +// CHECK20-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !22 +// CHECK20-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !22 +// CHECK20-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !22 +// CHECK20-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !22 +// CHECK20-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !22 // CHECK20-NEXT: [[TMP11:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB1]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l100.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 1, i32 0) #[[ATTR3]] // CHECK20-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK20-NEXT: br i1 [[TMP12]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] @@ -9412,7 +9412,7 @@ int bar(int n){ // CHECK20-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK20-NEXT: br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK20: .cancel.exit: -// CHECK20-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK20-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK20-NEXT: br label [[DOTCANCEL_CONTINUE]] // CHECK20: .cancel.continue: // CHECK20-NEXT: ret void @@ -10055,7 +10055,7 @@ int bar(int n){ // // // CHECK20-LABEL: define {{[^@]+}}@.omp_offloading.requires_reg -// CHECK20-SAME: () #[[ATTR5:[0-9]+]] { +// CHECK20-SAME: () #[[ATTR6:[0-9]+]] { // CHECK20-NEXT: entry: // CHECK20-NEXT: call void @__tgt_register_requires(i64 1) // CHECK20-NEXT: ret void @@ -10114,7 +10114,7 @@ int bar(int n){ // CHECK25-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK25-NEXT: br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK25: .cancel.exit: -// CHECK25-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK25-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK25-NEXT: br label [[DOTCANCEL_CONTINUE]] // CHECK25: .cancel.continue: // CHECK25-NEXT: ret void @@ -10528,7 +10528,7 @@ int bar(int n){ // CHECK26-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK26-NEXT: br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK26: .cancel.exit: -// CHECK26-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK26-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK26-NEXT: br label [[DOTCANCEL_CONTINUE]] // CHECK26: .cancel.continue: // CHECK26-NEXT: ret void @@ -10942,7 +10942,7 @@ int bar(int n){ // CHECK27-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK27-NEXT: br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK27: .cancel.exit: -// CHECK27-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK27-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK27-NEXT: br label [[DOTCANCEL_CONTINUE]] // CHECK27: .cancel.continue: // CHECK27-NEXT: ret void @@ -11341,7 +11341,7 @@ int bar(int n){ // CHECK28-NEXT: [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0 // CHECK28-NEXT: br i1 [[TMP4]], label [[DOTCANCEL_EXIT:%.*]], label [[DOTCANCEL_CONTINUE:%.*]] // CHECK28: .cancel.exit: -// CHECK28-NEXT: [[TMP10:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) +// CHECK28-NEXT: [[TMP5:%.*]] = call i32 @__kmpc_cancel_barrier(%struct.ident_t* @[[GLOB2:[0-9]+]], i32 [[TMP2]]) // CHECK28-NEXT: br label [[DOTCANCEL_CONTINUE]] // CHECK28: .cancel.continue: // CHECK28-NEXT: ret void @@ -11686,4 +11686,3 @@ int bar(int n){ // CHECK28-NEXT: store i32 [[ADD4]], i32* [[ARRAYIDX]], align 4 // CHECK28-NEXT: ret void // -// diff --git a/clang/test/OpenMP/target_parallel_debug_codegen.cpp b/clang/test/OpenMP/target_parallel_debug_codegen.cpp index 007aa59b30d23..8734c45f05cb8 100644 --- a/clang/test/OpenMP/target_parallel_debug_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_debug_codegen.cpp @@ -97,7 +97,6 @@ int main() { // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[_TMP2]], align 8, !dbg [[DBG45]] // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !dbg [[DBG45]] // CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1), !dbg [[DBG45]] -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack_spmd(), !dbg [[DBG45]] // CHECK1-NEXT: br label [[DOTEXECUTE:%.*]], !dbg [[DBG45]] // CHECK1: .execute: // CHECK1-NEXT: [[TMP8:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1:[0-9]+]]) @@ -321,7 +320,6 @@ int main() { // CHECK1-NEXT: [[TMP8:%.*]] = load i8*, i8** [[_TMP2]], align 8, !dbg [[DBG135]] // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !dbg [[DBG135]] // CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1), !dbg [[DBG135]] -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack_spmd(), !dbg [[DBG135]] // CHECK1-NEXT: br label [[DOTEXECUTE:%.*]], !dbg [[DBG135]] // CHECK1: .execute: // CHECK1-NEXT: [[TMP9:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB3:[0-9]+]]) @@ -539,7 +537,6 @@ int main() { // CHECK1-NEXT: [[TMP11:%.*]] = load i8*, i8** [[_TMP3]], align 8, !dbg [[DBG210]] // CHECK1-NEXT: [[NVPTX_NUM_THREADS:%.*]] = call i32 @llvm.nvvm.read.ptx.sreg.ntid.x(), !dbg [[DBG210]] // CHECK1-NEXT: call void @__kmpc_spmd_kernel_init(i32 [[NVPTX_NUM_THREADS]], i16 1), !dbg [[DBG210]] -// CHECK1-NEXT: call void @__kmpc_data_sharing_init_stack_spmd(), !dbg [[DBG210]] // CHECK1-NEXT: br label [[DOTEXECUTE:%.*]], !dbg [[DBG210]] // CHECK1: .execute: // CHECK1-NEXT: [[TMP12:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB5:[0-9]+]]) diff --git a/clang/test/OpenMP/target_parallel_for_codegen.cpp b/clang/test/OpenMP/target_parallel_for_codegen.cpp index 1c798f0885c82..4a3e7113073b5 100644 --- a/clang/test/OpenMP/target_parallel_for_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_codegen.cpp @@ -787,32 +787,32 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] -// CHECK1-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP9]], 3 // CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[MUL2]] to i64 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV3]] -// CHECK1-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !11 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !12 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !11 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !12 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -1042,25 +1042,25 @@ int bar(int n){ // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !23 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !23 -// CHECK1-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !23 -// CHECK1-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !23 +// CHECK1-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !24 +// CHECK1-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !24 +// CHECK1-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !24 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i64 0, i64 0 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i64 0, i64 0 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i64 0, i64 0 @@ -1072,16 +1072,16 @@ int bar(int n){ // CHECK1: omp_offload.failed.i: // CHECK1-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i64* [[AA_CASTED_I]] to i16* -// CHECK1-NEXT: store i16 [[TMP27]], i16* [[CONV_I]], align 2, !noalias !23 -// CHECK1-NEXT: [[TMP28:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !23 +// CHECK1-NEXT: store i16 [[TMP27]], i16* [[CONV_I]], align 2, !noalias !24 +// CHECK1-NEXT: [[TMP28:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !24 // CHECK1-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP23]], align 4 // CHECK1-NEXT: [[CONV4_I:%.*]] = bitcast i64* [[LIN_CASTED_I]] to i32* -// CHECK1-NEXT: store i32 [[TMP29]], i32* [[CONV4_I]], align 4, !noalias !23 -// CHECK1-NEXT: [[TMP30:%.*]] = load i64, i64* [[LIN_CASTED_I]], align 8, !noalias !23 +// CHECK1-NEXT: store i32 [[TMP29]], i32* [[CONV4_I]], align 4, !noalias !24 +// CHECK1-NEXT: [[TMP30:%.*]] = load i64, i64* [[LIN_CASTED_I]], align 8, !noalias !24 // CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP24]], align 4 // CHECK1-NEXT: [[CONV5_I:%.*]] = bitcast i64* [[A_CASTED_I]] to i32* -// CHECK1-NEXT: store i32 [[TMP31]], i32* [[CONV5_I]], align 4, !noalias !23 -// CHECK1-NEXT: [[TMP32:%.*]] = load i64, i64* [[A_CASTED_I]], align 8, !noalias !23 +// CHECK1-NEXT: store i32 [[TMP31]], i32* [[CONV5_I]], align 4, !noalias !24 +// CHECK1-NEXT: [[TMP32:%.*]] = load i64, i64* [[A_CASTED_I]], align 8, !noalias !24 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l138(i64 [[TMP28]], i64 [[TMP30]], i64 [[TMP32]]) #[[ATTR3]] // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__3_EXIT]] // CHECK1: .omp_outlined..3.exit: @@ -2436,32 +2436,32 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] -// CHECK2-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !11 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP9]], 3 // CHECK2-NEXT: [[CONV3:%.*]] = sext i32 [[MUL2]] to i64 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV3]] -// CHECK2-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !11 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !12 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !11 +// CHECK2-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !12 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK2-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -2691,25 +2691,25 @@ int bar(int n){ // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK2-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !23 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 // CHECK2-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK2-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !23 -// CHECK2-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !23 -// CHECK2-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !23 +// CHECK2-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !24 +// CHECK2-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !24 +// CHECK2-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !24 // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i64 0, i64 0 // CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i64 0, i64 0 // CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i64 0, i64 0 @@ -2721,16 +2721,16 @@ int bar(int n){ // CHECK2: omp_offload.failed.i: // CHECK2-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK2-NEXT: [[CONV_I:%.*]] = bitcast i64* [[AA_CASTED_I]] to i16* -// CHECK2-NEXT: store i16 [[TMP27]], i16* [[CONV_I]], align 2, !noalias !23 -// CHECK2-NEXT: [[TMP28:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !23 +// CHECK2-NEXT: store i16 [[TMP27]], i16* [[CONV_I]], align 2, !noalias !24 +// CHECK2-NEXT: [[TMP28:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !24 // CHECK2-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP23]], align 4 // CHECK2-NEXT: [[CONV4_I:%.*]] = bitcast i64* [[LIN_CASTED_I]] to i32* -// CHECK2-NEXT: store i32 [[TMP29]], i32* [[CONV4_I]], align 4, !noalias !23 -// CHECK2-NEXT: [[TMP30:%.*]] = load i64, i64* [[LIN_CASTED_I]], align 8, !noalias !23 +// CHECK2-NEXT: store i32 [[TMP29]], i32* [[CONV4_I]], align 4, !noalias !24 +// CHECK2-NEXT: [[TMP30:%.*]] = load i64, i64* [[LIN_CASTED_I]], align 8, !noalias !24 // CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP24]], align 4 // CHECK2-NEXT: [[CONV5_I:%.*]] = bitcast i64* [[A_CASTED_I]] to i32* -// CHECK2-NEXT: store i32 [[TMP31]], i32* [[CONV5_I]], align 4, !noalias !23 -// CHECK2-NEXT: [[TMP32:%.*]] = load i64, i64* [[A_CASTED_I]], align 8, !noalias !23 +// CHECK2-NEXT: store i32 [[TMP31]], i32* [[CONV5_I]], align 4, !noalias !24 +// CHECK2-NEXT: [[TMP32:%.*]] = load i64, i64* [[A_CASTED_I]], align 8, !noalias !24 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l138(i64 [[TMP28]], i64 [[TMP30]], i64 [[TMP32]]) #[[ATTR3]] // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__3_EXIT]] // CHECK2: .omp_outlined..3.exit: @@ -4070,32 +4070,32 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] -// CHECK3-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP10]], 3 // CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP9]], [[CONV]] -// CHECK3-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !12 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !13 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -4319,25 +4319,25 @@ int bar(int n){ // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK3-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 -// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !24 -// CHECK3-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !24 -// CHECK3-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !24 -// CHECK3-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !24 -// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !24 -// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !24 -// CHECK3-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !24 -// CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !24 +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !25 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !25 +// CHECK3-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !25 +// CHECK3-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !25 +// CHECK3-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !25 +// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !25 +// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !25 +// CHECK3-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !25 +// CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !25 // CHECK3-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK3-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !24 -// CHECK3-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !24 -// CHECK3-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !24 -// CHECK3-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !24 +// CHECK3-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !25 +// CHECK3-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !25 +// CHECK3-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !25 +// CHECK3-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !25 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i32 0, i32 0 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i32 0, i32 0 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i32 0, i32 0 @@ -4349,14 +4349,14 @@ int bar(int n){ // CHECK3: omp_offload.failed.i: // CHECK3-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK3-NEXT: [[CONV_I:%.*]] = bitcast i32* [[AA_CASTED_I]] to i16* -// CHECK3-NEXT: store i16 [[TMP27]], i16* [[CONV_I]], align 2, !noalias !24 -// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !24 +// CHECK3-NEXT: store i16 [[TMP27]], i16* [[CONV_I]], align 2, !noalias !25 +// CHECK3-NEXT: [[TMP28:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !25 // CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK3-NEXT: store i32 [[TMP29]], i32* [[LIN_CASTED_I]], align 4, !noalias !24 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[LIN_CASTED_I]], align 4, !noalias !24 +// CHECK3-NEXT: store i32 [[TMP29]], i32* [[LIN_CASTED_I]], align 4, !noalias !25 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[LIN_CASTED_I]], align 4, !noalias !25 // CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK3-NEXT: store i32 [[TMP31]], i32* [[A_CASTED_I]], align 4, !noalias !24 -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[A_CASTED_I]], align 4, !noalias !24 +// CHECK3-NEXT: store i32 [[TMP31]], i32* [[A_CASTED_I]], align 4, !noalias !25 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[A_CASTED_I]], align 4, !noalias !25 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l138(i32 [[TMP28]], i32 [[TMP30]], i32 [[TMP32]]) #[[ATTR3]] // CHECK3-NEXT: br label [[DOTOMP_OUTLINED__3_EXIT]] // CHECK3: .omp_outlined..3.exit: @@ -5675,32 +5675,32 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] -// CHECK4-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK4-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !12 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP10]], 3 // CHECK4-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP9]], [[CONV]] -// CHECK4-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !12 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !13 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -5924,25 +5924,25 @@ int bar(int n){ // CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK4-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK4-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 -// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !24 -// CHECK4-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !24 -// CHECK4-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !24 -// CHECK4-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !24 -// CHECK4-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !24 -// CHECK4-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !24 -// CHECK4-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !24 -// CHECK4-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !24 +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !25 +// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !25 +// CHECK4-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !25 +// CHECK4-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !25 +// CHECK4-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !25 +// CHECK4-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !25 +// CHECK4-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !25 +// CHECK4-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !25 +// CHECK4-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !25 // CHECK4-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK4-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK4-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !24 -// CHECK4-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !24 -// CHECK4-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !24 -// CHECK4-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !24 +// CHECK4-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !25 +// CHECK4-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !25 +// CHECK4-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !25 +// CHECK4-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !25 // CHECK4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i32 0, i32 0 // CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i32 0, i32 0 // CHECK4-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i32 0, i32 0 @@ -5954,14 +5954,14 @@ int bar(int n){ // CHECK4: omp_offload.failed.i: // CHECK4-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK4-NEXT: [[CONV_I:%.*]] = bitcast i32* [[AA_CASTED_I]] to i16* -// CHECK4-NEXT: store i16 [[TMP27]], i16* [[CONV_I]], align 2, !noalias !24 -// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !24 +// CHECK4-NEXT: store i16 [[TMP27]], i16* [[CONV_I]], align 2, !noalias !25 +// CHECK4-NEXT: [[TMP28:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !25 // CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK4-NEXT: store i32 [[TMP29]], i32* [[LIN_CASTED_I]], align 4, !noalias !24 -// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[LIN_CASTED_I]], align 4, !noalias !24 +// CHECK4-NEXT: store i32 [[TMP29]], i32* [[LIN_CASTED_I]], align 4, !noalias !25 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[LIN_CASTED_I]], align 4, !noalias !25 // CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK4-NEXT: store i32 [[TMP31]], i32* [[A_CASTED_I]], align 4, !noalias !24 -// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[A_CASTED_I]], align 4, !noalias !24 +// CHECK4-NEXT: store i32 [[TMP31]], i32* [[A_CASTED_I]], align 4, !noalias !25 +// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[A_CASTED_I]], align 4, !noalias !25 // CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l138(i32 [[TMP28]], i32 [[TMP30]], i32 [[TMP32]]) #[[ATTR3]] // CHECK4-NEXT: br label [[DOTOMP_OUTLINED__3_EXIT]] // CHECK4: .omp_outlined..3.exit: @@ -10503,32 +10503,32 @@ int bar(int n){ // CHECK17-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] -// CHECK17-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !11 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !12 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !12 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK17-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP9]], 3 // CHECK17-NEXT: [[CONV3:%.*]] = sext i32 [[MUL2]] to i64 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV3]] -// CHECK17-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !11 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !11 +// CHECK17-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !12 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !12 // CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK17-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !11 +// CHECK17-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !12 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK17-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: @@ -10758,25 +10758,25 @@ int bar(int n){ // CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK17-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK17-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) -// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !23 -// CHECK17-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !23 -// CHECK17-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 -// CHECK17-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK17-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !23 -// CHECK17-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK17-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK17-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK17-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 +// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 +// CHECK17-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !24 +// CHECK17-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 +// CHECK17-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 +// CHECK17-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !24 +// CHECK17-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK17-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK17-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 +// CHECK17-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 // CHECK17-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK17-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK17-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !23 -// CHECK17-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !23 -// CHECK17-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !23 -// CHECK17-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !23 +// CHECK17-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !24 +// CHECK17-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !24 +// CHECK17-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !24 +// CHECK17-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !24 // CHECK17-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i64 0, i64 0 // CHECK17-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i64 0, i64 0 // CHECK17-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i64 0, i64 0 @@ -10788,16 +10788,16 @@ int bar(int n){ // CHECK17: omp_offload.failed.i: // CHECK17-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK17-NEXT: [[CONV_I:%.*]] = bitcast i64* [[AA_CASTED_I]] to i16* -// CHECK17-NEXT: store i16 [[TMP27]], i16* [[CONV_I]], align 2, !noalias !23 -// CHECK17-NEXT: [[TMP28:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !23 +// CHECK17-NEXT: store i16 [[TMP27]], i16* [[CONV_I]], align 2, !noalias !24 +// CHECK17-NEXT: [[TMP28:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !24 // CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP23]], align 4 // CHECK17-NEXT: [[CONV4_I:%.*]] = bitcast i64* [[LIN_CASTED_I]] to i32* -// CHECK17-NEXT: store i32 [[TMP29]], i32* [[CONV4_I]], align 4, !noalias !23 -// CHECK17-NEXT: [[TMP30:%.*]] = load i64, i64* [[LIN_CASTED_I]], align 8, !noalias !23 +// CHECK17-NEXT: store i32 [[TMP29]], i32* [[CONV4_I]], align 4, !noalias !24 +// CHECK17-NEXT: [[TMP30:%.*]] = load i64, i64* [[LIN_CASTED_I]], align 8, !noalias !24 // CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP24]], align 4 // CHECK17-NEXT: [[CONV5_I:%.*]] = bitcast i64* [[A_CASTED_I]] to i32* -// CHECK17-NEXT: store i32 [[TMP31]], i32* [[CONV5_I]], align 4, !noalias !23 -// CHECK17-NEXT: [[TMP32:%.*]] = load i64, i64* [[A_CASTED_I]], align 8, !noalias !23 +// CHECK17-NEXT: store i32 [[TMP31]], i32* [[CONV5_I]], align 4, !noalias !24 +// CHECK17-NEXT: [[TMP32:%.*]] = load i64, i64* [[A_CASTED_I]], align 8, !noalias !24 // CHECK17-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l138(i64 [[TMP28]], i64 [[TMP30]], i64 [[TMP32]]) #[[ATTR3]] // CHECK17-NEXT: br label [[DOTOMP_OUTLINED__3_EXIT]] // CHECK17: .omp_outlined..3.exit: @@ -12152,32 +12152,32 @@ int bar(int n){ // CHECK18-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK18-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK18-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK18-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] -// CHECK18-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK18-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !11 -// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK18-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !12 +// CHECK18-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !12 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK18-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP9]], 3 // CHECK18-NEXT: [[CONV3:%.*]] = sext i32 [[MUL2]] to i64 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV3]] -// CHECK18-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !11 -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !11 +// CHECK18-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !12 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !12 // CHECK18-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK18-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !11 +// CHECK18-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !12 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK18-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK18-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: @@ -12407,25 +12407,25 @@ int bar(int n){ // CHECK18-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK18-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK18-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) -// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) -// CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !23 -// CHECK18-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !23 -// CHECK18-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 -// CHECK18-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK18-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !23 -// CHECK18-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK18-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK18-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK18-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 +// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 +// CHECK18-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !24 +// CHECK18-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 +// CHECK18-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 +// CHECK18-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !24 +// CHECK18-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK18-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK18-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 +// CHECK18-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 // CHECK18-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK18-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK18-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !23 -// CHECK18-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !23 -// CHECK18-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !23 -// CHECK18-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !23 +// CHECK18-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !24 +// CHECK18-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !24 +// CHECK18-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !24 +// CHECK18-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !24 // CHECK18-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i64 0, i64 0 // CHECK18-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i64 0, i64 0 // CHECK18-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i64 0, i64 0 @@ -12437,16 +12437,16 @@ int bar(int n){ // CHECK18: omp_offload.failed.i: // CHECK18-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK18-NEXT: [[CONV_I:%.*]] = bitcast i64* [[AA_CASTED_I]] to i16* -// CHECK18-NEXT: store i16 [[TMP27]], i16* [[CONV_I]], align 2, !noalias !23 -// CHECK18-NEXT: [[TMP28:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !23 +// CHECK18-NEXT: store i16 [[TMP27]], i16* [[CONV_I]], align 2, !noalias !24 +// CHECK18-NEXT: [[TMP28:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !24 // CHECK18-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP23]], align 4 // CHECK18-NEXT: [[CONV4_I:%.*]] = bitcast i64* [[LIN_CASTED_I]] to i32* -// CHECK18-NEXT: store i32 [[TMP29]], i32* [[CONV4_I]], align 4, !noalias !23 -// CHECK18-NEXT: [[TMP30:%.*]] = load i64, i64* [[LIN_CASTED_I]], align 8, !noalias !23 +// CHECK18-NEXT: store i32 [[TMP29]], i32* [[CONV4_I]], align 4, !noalias !24 +// CHECK18-NEXT: [[TMP30:%.*]] = load i64, i64* [[LIN_CASTED_I]], align 8, !noalias !24 // CHECK18-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP24]], align 4 // CHECK18-NEXT: [[CONV5_I:%.*]] = bitcast i64* [[A_CASTED_I]] to i32* -// CHECK18-NEXT: store i32 [[TMP31]], i32* [[CONV5_I]], align 4, !noalias !23 -// CHECK18-NEXT: [[TMP32:%.*]] = load i64, i64* [[A_CASTED_I]], align 8, !noalias !23 +// CHECK18-NEXT: store i32 [[TMP31]], i32* [[CONV5_I]], align 4, !noalias !24 +// CHECK18-NEXT: [[TMP32:%.*]] = load i64, i64* [[A_CASTED_I]], align 8, !noalias !24 // CHECK18-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l138(i64 [[TMP28]], i64 [[TMP30]], i64 [[TMP32]]) #[[ATTR3]] // CHECK18-NEXT: br label [[DOTOMP_OUTLINED__3_EXIT]] // CHECK18: .omp_outlined..3.exit: @@ -13786,32 +13786,32 @@ int bar(int n){ // CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] -// CHECK19-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !12 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK19-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !13 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK19-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP10]], 3 // CHECK19-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP9]], [[CONV]] -// CHECK19-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !12 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !13 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !13 // CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK19-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !13 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK19-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: @@ -14035,25 +14035,25 @@ int bar(int n){ // CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK19-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK19-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) -// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) -// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) -// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 -// CHECK19-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !24 -// CHECK19-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !24 -// CHECK19-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !24 -// CHECK19-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !24 -// CHECK19-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !24 -// CHECK19-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !24 -// CHECK19-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !24 -// CHECK19-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !24 +// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) +// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) +// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) +// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !25 +// CHECK19-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !25 +// CHECK19-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !25 +// CHECK19-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !25 +// CHECK19-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !25 +// CHECK19-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !25 +// CHECK19-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !25 +// CHECK19-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !25 +// CHECK19-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !25 // CHECK19-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK19-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK19-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !24 -// CHECK19-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !24 -// CHECK19-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !24 -// CHECK19-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !24 +// CHECK19-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !25 +// CHECK19-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !25 +// CHECK19-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !25 +// CHECK19-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !25 // CHECK19-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i32 0, i32 0 // CHECK19-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i32 0, i32 0 // CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i32 0, i32 0 @@ -14065,14 +14065,14 @@ int bar(int n){ // CHECK19: omp_offload.failed.i: // CHECK19-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK19-NEXT: [[CONV_I:%.*]] = bitcast i32* [[AA_CASTED_I]] to i16* -// CHECK19-NEXT: store i16 [[TMP27]], i16* [[CONV_I]], align 2, !noalias !24 -// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !24 +// CHECK19-NEXT: store i16 [[TMP27]], i16* [[CONV_I]], align 2, !noalias !25 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !25 // CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK19-NEXT: store i32 [[TMP29]], i32* [[LIN_CASTED_I]], align 4, !noalias !24 -// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[LIN_CASTED_I]], align 4, !noalias !24 +// CHECK19-NEXT: store i32 [[TMP29]], i32* [[LIN_CASTED_I]], align 4, !noalias !25 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[LIN_CASTED_I]], align 4, !noalias !25 // CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK19-NEXT: store i32 [[TMP31]], i32* [[A_CASTED_I]], align 4, !noalias !24 -// CHECK19-NEXT: [[TMP32:%.*]] = load i32, i32* [[A_CASTED_I]], align 4, !noalias !24 +// CHECK19-NEXT: store i32 [[TMP31]], i32* [[A_CASTED_I]], align 4, !noalias !25 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, i32* [[A_CASTED_I]], align 4, !noalias !25 // CHECK19-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l138(i32 [[TMP28]], i32 [[TMP30]], i32 [[TMP32]]) #[[ATTR3]] // CHECK19-NEXT: br label [[DOTOMP_OUTLINED__3_EXIT]] // CHECK19: .omp_outlined..3.exit: @@ -15391,32 +15391,32 @@ int bar(int n){ // CHECK20-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK20-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK20-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK20-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] -// CHECK20-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK20-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !12 -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK20-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK20-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !13 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK20-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP10]], 3 // CHECK20-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP9]], [[CONV]] -// CHECK20-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !12 -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !12 +// CHECK20-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !13 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !13 // CHECK20-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK20-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !12 +// CHECK20-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !13 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK20-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK20-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK20: omp.dispatch.inc: @@ -15640,25 +15640,25 @@ int bar(int n){ // CHECK20-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK20-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK20-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) -// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) -// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) -// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) -// CHECK20-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 -// CHECK20-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !24 -// CHECK20-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !24 -// CHECK20-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !24 -// CHECK20-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !24 -// CHECK20-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !24 -// CHECK20-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !24 -// CHECK20-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !24 -// CHECK20-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !24 +// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) +// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) +// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) +// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) +// CHECK20-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !25 +// CHECK20-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !25 +// CHECK20-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !25 +// CHECK20-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !25 +// CHECK20-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !25 +// CHECK20-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !25 +// CHECK20-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !25 +// CHECK20-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !25 +// CHECK20-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !25 // CHECK20-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK20-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK20-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !24 -// CHECK20-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !24 -// CHECK20-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !24 -// CHECK20-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !24 +// CHECK20-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !25 +// CHECK20-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !25 +// CHECK20-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !25 +// CHECK20-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !25 // CHECK20-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i32 0, i32 0 // CHECK20-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i32 0, i32 0 // CHECK20-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i32 0, i32 0 @@ -15670,14 +15670,14 @@ int bar(int n){ // CHECK20: omp_offload.failed.i: // CHECK20-NEXT: [[TMP27:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK20-NEXT: [[CONV_I:%.*]] = bitcast i32* [[AA_CASTED_I]] to i16* -// CHECK20-NEXT: store i16 [[TMP27]], i16* [[CONV_I]], align 2, !noalias !24 -// CHECK20-NEXT: [[TMP28:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !24 +// CHECK20-NEXT: store i16 [[TMP27]], i16* [[CONV_I]], align 2, !noalias !25 +// CHECK20-NEXT: [[TMP28:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !25 // CHECK20-NEXT: [[TMP29:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK20-NEXT: store i32 [[TMP29]], i32* [[LIN_CASTED_I]], align 4, !noalias !24 -// CHECK20-NEXT: [[TMP30:%.*]] = load i32, i32* [[LIN_CASTED_I]], align 4, !noalias !24 +// CHECK20-NEXT: store i32 [[TMP29]], i32* [[LIN_CASTED_I]], align 4, !noalias !25 +// CHECK20-NEXT: [[TMP30:%.*]] = load i32, i32* [[LIN_CASTED_I]], align 4, !noalias !25 // CHECK20-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK20-NEXT: store i32 [[TMP31]], i32* [[A_CASTED_I]], align 4, !noalias !24 -// CHECK20-NEXT: [[TMP32:%.*]] = load i32, i32* [[A_CASTED_I]], align 4, !noalias !24 +// CHECK20-NEXT: store i32 [[TMP31]], i32* [[A_CASTED_I]], align 4, !noalias !25 +// CHECK20-NEXT: [[TMP32:%.*]] = load i32, i32* [[A_CASTED_I]], align 4, !noalias !25 // CHECK20-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l138(i32 [[TMP28]], i32 [[TMP30]], i32 [[TMP32]]) #[[ATTR3]] // CHECK20-NEXT: br label [[DOTOMP_OUTLINED__3_EXIT]] // CHECK20: .omp_outlined..3.exit: @@ -19747,4 +19747,3 @@ int bar(int n){ // CHECK28-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK28-NEXT: ret void // -// diff --git a/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp index b6db897c69460..f0aca4928a72d 100644 --- a/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_reduction_task_codegen.cpp @@ -465,26 +465,26 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !11 +// CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* @@ -518,8 +518,8 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] // CHECK1-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP48:%.*]] = getelementptr i8, i8* [[TMP40]], i64 [[TMP47]] -// CHECK1-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !11 +// CHECK1-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !12 // CHECK1-NEXT: ret i32 0 // // @@ -1002,26 +1002,26 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK2-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK2-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK2-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !11 +// CHECK2-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK2-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK2-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK2-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK2-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK2-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* @@ -1055,8 +1055,8 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] // CHECK2-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK2-NEXT: [[TMP48:%.*]] = getelementptr i8, i8* [[TMP40]], i64 [[TMP47]] -// CHECK2-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !11 +// CHECK2-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !12 // CHECK2-NEXT: ret i32 0 // // @@ -1108,4 +1108,3 @@ int main(int argc, char **argv) { // CHECK2: omp.arraycpy.done5: // CHECK2-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp index 43a695c1e9065..124d1d8b3035c 100644 --- a/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_for_simd_codegen.cpp @@ -632,23 +632,23 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -684,17 +684,17 @@ int bar(int n){ // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !22 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !22 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !22 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !22 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !22 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !22 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !22 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !25 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !25 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !25 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !25 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !25 +// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !25 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !25 // CHECK1-NEXT: [[TMP11:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 1, i32 0) #[[ATTR3]] // CHECK1-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK1-NEXT: br i1 [[TMP12]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] @@ -767,32 +767,32 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] -// CHECK1-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !23 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP9]], 3 // CHECK1-NEXT: [[CONV3:%.*]] = sext i32 [[MUL2]] to i64 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV3]] -// CHECK1-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !23 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !23 +// CHECK1-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !26 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !23 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -905,44 +905,44 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !29 // CHECK1-NEXT: [[CMP6:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 // CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 // CHECK1-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK1-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 +// CHECK1-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[CONV7:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !29 // CHECK1-NEXT: [[MUL8:%.*]] = mul i64 [[TMP11]], [[TMP12]] // CHECK1-NEXT: [[ADD:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK1-NEXT: [[CONV9:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK1-NEXT: store i32 [[CONV9]], i32* [[LIN4]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START3]], align 4 +// CHECK1-NEXT: store i32 [[CONV9]], i32* [[LIN4]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START3]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[CONV10:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !29 // CHECK1-NEXT: [[MUL11:%.*]] = mul i64 [[TMP14]], [[TMP15]] // CHECK1-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK1-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 -// CHECK1-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !29 // CHECK1-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK1-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK1-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8 +// CHECK1-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8, !llvm.access.group !29 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 // CHECK1-NEXT: [[ADD17:%.*]] = add i64 [[TMP17]], 1 -// CHECK1-NEXT: store i64 [[ADD17]], i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK1-NEXT: store i64 [[ADD17]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1042,32 +1042,32 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK1-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK1-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !32 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !32 // CHECK1-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK1-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK1-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8, !llvm.access.group !32 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1203,60 +1203,60 @@ int bar(int n){ // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK1-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 -// CHECK1-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !35 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8, !llvm.access.group !35 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double // CHECK1-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 // CHECK1-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK1-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK1-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4 +// CHECK1-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[CONV13:%.*]] = fpext float [[TMP21]] to double // CHECK1-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.000000e+00 // CHECK1-NEXT: [[CONV15:%.*]] = fptrunc double [[ADD14]] to float -// CHECK1-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4 +// CHECK1-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 // CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX16]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !35 // CHECK1-NEXT: [[ADD18:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8 +// CHECK1-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !35 // CHECK1-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] // CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP23]] // CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX19]], i64 3 -// CHECK1-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8 +// CHECK1-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8, !llvm.access.group !35 // CHECK1-NEXT: [[ADD21:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8 +// CHECK1-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8, !llvm.access.group !35 // CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !35 // CHECK1-NEXT: [[ADD22:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK1-NEXT: store i64 [[ADD22]], i64* [[X]], align 8 +// CHECK1-NEXT: store i64 [[ADD22]], i64* [[X]], align 8, !llvm.access.group !35 // CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8 +// CHECK1-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !35 // CHECK1-NEXT: [[CONV23:%.*]] = sext i8 [[TMP26]] to i32 // CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[CONV23]], 1 // CHECK1-NEXT: [[CONV25:%.*]] = trunc i32 [[ADD24]] to i8 -// CHECK1-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8 +// CHECK1-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8, !llvm.access.group !35 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK1-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -1658,37 +1658,37 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[TMP8]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !38 // CHECK1-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 // CHECK1-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK1-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK1-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !38 // CHECK1-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK1-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: store double [[ADD]], double* [[A]], align 8 +// CHECK1-NEXT: store double [[ADD]], double* [[A]], align 8, !llvm.access.group !38 // CHECK1-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load double, double* [[A5]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load double, double* [[A5]], align 8, !llvm.access.group !38 // CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK1-NEXT: store double [[INC]], double* [[A5]], align 8 +// CHECK1-NEXT: store double [[INC]], double* [[A5]], align 8, !llvm.access.group !38 // CHECK1-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 // CHECK1-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP14]] // CHECK1-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK1-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// CHECK1-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2, !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 // CHECK1-NEXT: [[ADD8:%.*]] = add i64 [[TMP15]], 1 -// CHECK1-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK1-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1832,35 +1832,35 @@ int bar(int n){ // CHECK1-NEXT: store i64 [[TMP5]], i64* [[DOTOMP_IV]], align 8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK1-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !41 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !41 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] -// CHECK1-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !41 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !41 // CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK1-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !41 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !41 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK1-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK1-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !41 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -2221,23 +2221,23 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2273,17 +2273,17 @@ int bar(int n){ // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]]) // CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !22 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !22 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !22 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !22 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !22 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !22 -// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !22 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !25 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !25 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !25 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !25 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !25 +// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !25 +// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !25 // CHECK2-NEXT: [[TMP11:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 1, i32 0) #[[ATTR3]] // CHECK2-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK2-NEXT: br i1 [[TMP12]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] @@ -2356,32 +2356,32 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] -// CHECK2-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !23 -// CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !23 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !26 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP9]], 3 // CHECK2-NEXT: [[CONV3:%.*]] = sext i32 [[MUL2]] to i64 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV3]] -// CHECK2-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !23 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !23 +// CHECK2-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !26 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !26 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !23 +// CHECK2-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !26 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK2-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -2494,44 +2494,44 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_IV]], align 8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !29 // CHECK2-NEXT: [[CMP6:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 // CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 // CHECK2-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK2-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 +// CHECK2-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !29 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[CONV7:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !29 // CHECK2-NEXT: [[MUL8:%.*]] = mul i64 [[TMP11]], [[TMP12]] // CHECK2-NEXT: [[ADD:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK2-NEXT: [[CONV9:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK2-NEXT: store i32 [[CONV9]], i32* [[LIN4]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START3]], align 4 +// CHECK2-NEXT: store i32 [[CONV9]], i32* [[LIN4]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START3]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[CONV10:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK2-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK2-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !29 // CHECK2-NEXT: [[MUL11:%.*]] = mul i64 [[TMP14]], [[TMP15]] // CHECK2-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK2-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 -// CHECK2-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !29 // CHECK2-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK2-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK2-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK2-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8 +// CHECK2-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8, !llvm.access.group !29 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 // CHECK2-NEXT: [[ADD17:%.*]] = add i64 [[TMP17]], 1 -// CHECK2-NEXT: store i64 [[ADD17]], i64* [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK2-NEXT: store i64 [[ADD17]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2631,32 +2631,32 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK2-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK2-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !32 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK2-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !32 // CHECK2-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK2-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK2-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8, !llvm.access.group !32 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2792,60 +2792,60 @@ int bar(int n){ // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK2-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK2-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 -// CHECK2-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1, !llvm.access.group !35 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !35 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8, !llvm.access.group !35 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double // CHECK2-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 // CHECK2-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK2-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK2-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4 +// CHECK2-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[CONV13:%.*]] = fpext float [[TMP21]] to double // CHECK2-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.000000e+00 // CHECK2-NEXT: [[CONV15:%.*]] = fptrunc double [[ADD14]] to float -// CHECK2-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4 +// CHECK2-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 // CHECK2-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX16]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !35 // CHECK2-NEXT: [[ADD18:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK2-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8 +// CHECK2-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !35 // CHECK2-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] // CHECK2-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP23]] // CHECK2-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX19]], i64 3 -// CHECK2-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8 +// CHECK2-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8, !llvm.access.group !35 // CHECK2-NEXT: [[ADD21:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK2-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8 +// CHECK2-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8, !llvm.access.group !35 // CHECK2-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8 +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !35 // CHECK2-NEXT: [[ADD22:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK2-NEXT: store i64 [[ADD22]], i64* [[X]], align 8 +// CHECK2-NEXT: store i64 [[ADD22]], i64* [[X]], align 8, !llvm.access.group !35 // CHECK2-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8 +// CHECK2-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !35 // CHECK2-NEXT: [[CONV23:%.*]] = sext i8 [[TMP26]] to i32 // CHECK2-NEXT: [[ADD24:%.*]] = add nsw i32 [[CONV23]], 1 // CHECK2-NEXT: [[CONV25:%.*]] = trunc i32 [[ADD24]] to i8 -// CHECK2-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8 +// CHECK2-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8, !llvm.access.group !35 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK2-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -3247,37 +3247,37 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[TMP8]], i64* [[DOTOMP_IV]], align 8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 +// CHECK2-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !38 // CHECK2-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] // CHECK2-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 // CHECK2-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK2-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK2-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !38 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !38 // CHECK2-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK2-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: store double [[ADD]], double* [[A]], align 8 +// CHECK2-NEXT: store double [[ADD]], double* [[A]], align 8, !llvm.access.group !38 // CHECK2-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP13:%.*]] = load double, double* [[A5]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load double, double* [[A5]], align 8, !llvm.access.group !38 // CHECK2-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK2-NEXT: store double [[INC]], double* [[A5]], align 8 +// CHECK2-NEXT: store double [[INC]], double* [[A5]], align 8, !llvm.access.group !38 // CHECK2-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 // CHECK2-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP14]] // CHECK2-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK2-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// CHECK2-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2, !llvm.access.group !38 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 // CHECK2-NEXT: [[ADD8:%.*]] = add i64 [[TMP15]], 1 -// CHECK2-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK2-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -3421,35 +3421,35 @@ int bar(int n){ // CHECK2-NEXT: store i64 [[TMP5]], i64* [[DOTOMP_IV]], align 8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK2-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !41 +// CHECK2-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !41 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !41 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] -// CHECK2-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !41 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !41 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !41 +// CHECK2-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !41 // CHECK2-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK2-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK2-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !41 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !41 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK2-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK2-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !41 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -3800,23 +3800,23 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3852,17 +3852,17 @@ int bar(int n){ // CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4 // CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK3-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) // CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !23 -// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !23 -// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !23 -// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !23 -// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !23 -// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !23 -// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !23 +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]]) +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !26 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !26 +// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !26 +// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !26 +// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !26 +// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !26 +// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !26 // CHECK3-NEXT: [[TMP11:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 1, i32 0) #[[ATTR3]] // CHECK3-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK3-NEXT: br i1 [[TMP12]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] @@ -3930,32 +3930,32 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] -// CHECK3-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !24 -// CHECK3-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !24 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK3-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !27 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK3-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP10]], 3 // CHECK3-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP9]], [[CONV]] -// CHECK3-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !24 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !27 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !27 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !27 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -4062,44 +4062,44 @@ int bar(int n){ // CHECK3-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !30 // CHECK3-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 // CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 // CHECK3-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK3-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 +// CHECK3-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !30 // CHECK3-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !30 // CHECK3-NEXT: [[MUL6:%.*]] = mul i64 [[TMP11]], [[TMP12]] // CHECK3-NEXT: [[ADD:%.*]] = add i64 [[CONV5]], [[MUL6]] // CHECK3-NEXT: [[CONV7:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK3-NEXT: store i32 [[CONV7]], i32* [[LIN2]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4 +// CHECK3-NEXT: store i32 [[CONV7]], i32* [[LIN2]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4, !llvm.access.group !30 // CHECK3-NEXT: [[CONV8:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK3-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK3-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !30 // CHECK3-NEXT: [[MUL9:%.*]] = mul i64 [[TMP14]], [[TMP15]] // CHECK3-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK3-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 -// CHECK3-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !30 // CHECK3-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK3-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK3-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !30 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 // CHECK3-NEXT: [[ADD15:%.*]] = add i64 [[TMP17]], 1 -// CHECK3-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK3-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4196,32 +4196,32 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK3-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK3-NEXT: store i16 [[CONV2]], i16* [[IT]], align 2 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i16 [[CONV2]], i16* [[IT]], align 2, !llvm.access.group !33 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !33 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !33 // CHECK3-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK3-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK3-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4, !llvm.access.group !33 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4351,60 +4351,60 @@ int bar(int n){ // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 // CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK3-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 -// CHECK3-NEXT: store i8 [[CONV]], i8* [[IT]], align 1 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i8 [[CONV]], i8* [[IT]], align 1, !llvm.access.group !36 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !36 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4, !llvm.access.group !36 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !36 // CHECK3-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double // CHECK3-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK3-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK3-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !36 // CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK3-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4 +// CHECK3-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !36 // CHECK3-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double // CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 // CHECK3-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK3-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4 +// CHECK3-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !36 // CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8 +// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !36 // CHECK3-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8 +// CHECK3-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !36 // CHECK3-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] // CHECK3-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP23]] // CHECK3-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i32 3 -// CHECK3-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8 +// CHECK3-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !36 // CHECK3-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8 +// CHECK3-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !36 // CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4 +// CHECK3-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !36 // CHECK3-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK3-NEXT: store i64 [[ADD20]], i64* [[X]], align 4 +// CHECK3-NEXT: store i64 [[ADD20]], i64* [[X]], align 4, !llvm.access.group !36 // CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4 +// CHECK3-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !36 // CHECK3-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 // CHECK3-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 // CHECK3-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK3-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4 +// CHECK3-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4, !llvm.access.group !36 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK3-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK3-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -4800,37 +4800,37 @@ int bar(int n){ // CHECK3-NEXT: store i64 [[TMP8]], i64* [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 +// CHECK3-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !39 // CHECK3-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 // CHECK3-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK3-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK3-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK3-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !39 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[B_ADDR]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double // CHECK3-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: store double [[ADD]], double* [[A]], align 4 +// CHECK3-NEXT: store double [[ADD]], double* [[A]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load double, double* [[A4]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load double, double* [[A4]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK3-NEXT: store double [[INC]], double* [[A4]], align 4 +// CHECK3-NEXT: store double [[INC]], double* [[A4]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 // CHECK3-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP14]] // CHECK3-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK3-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2 +// CHECK3-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2, !llvm.access.group !39 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 // CHECK3-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK3-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK3-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4968,35 +4968,35 @@ int bar(int n){ // CHECK3-NEXT: store i64 [[TMP5]], i64* [[DOTOMP_IV]], align 8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK3-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !42 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !42 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] -// CHECK3-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !42 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !42 // CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !42 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !42 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !42 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !42 // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK3-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK3-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !42 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -5347,23 +5347,23 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5399,17 +5399,17 @@ int bar(int n){ // CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4 // CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK4-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) // CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !23 -// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !23 -// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !23 -// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !23 -// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !23 -// CHECK4-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !23 -// CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !23 +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]]) +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !26 +// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !26 +// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !26 +// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !26 +// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !26 +// CHECK4-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !26 +// CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !26 // CHECK4-NEXT: [[TMP11:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 1, i32 0) #[[ATTR3]] // CHECK4-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK4-NEXT: br i1 [[TMP12]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] @@ -5477,32 +5477,32 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] -// CHECK4-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !24 -// CHECK4-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !24 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK4-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK4-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !27 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK4-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP10]], 3 // CHECK4-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP9]], [[CONV]] -// CHECK4-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !24 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !24 +// CHECK4-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !27 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !27 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !24 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !27 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -5609,44 +5609,44 @@ int bar(int n){ // CHECK4-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_IV]], align 8 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK4-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK4-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK4-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !30 // CHECK4-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 // CHECK4-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 // CHECK4-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK4-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 +// CHECK4-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !30 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !30 // CHECK4-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK4-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK4-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK4-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !30 // CHECK4-NEXT: [[MUL6:%.*]] = mul i64 [[TMP11]], [[TMP12]] // CHECK4-NEXT: [[ADD:%.*]] = add i64 [[CONV5]], [[MUL6]] // CHECK4-NEXT: [[CONV7:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK4-NEXT: store i32 [[CONV7]], i32* [[LIN2]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4 +// CHECK4-NEXT: store i32 [[CONV7]], i32* [[LIN2]], align 4, !llvm.access.group !30 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4, !llvm.access.group !30 // CHECK4-NEXT: [[CONV8:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK4-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK4-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK4-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK4-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !30 // CHECK4-NEXT: [[MUL9:%.*]] = mul i64 [[TMP14]], [[TMP15]] // CHECK4-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK4-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 -// CHECK4-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4, !llvm.access.group !30 +// CHECK4-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !30 // CHECK4-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK4-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK4-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK4-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !30 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK4-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 // CHECK4-NEXT: [[ADD15:%.*]] = add i64 [[TMP17]], 1 -// CHECK4-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_IV]], align 8 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK4-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5743,32 +5743,32 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK4-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK4-NEXT: store i16 [[CONV2]], i16* [[IT]], align 2 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK4-NEXT: store i16 [[CONV2]], i16* [[IT]], align 2, !llvm.access.group !33 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !33 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !33 +// CHECK4-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !33 // CHECK4-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK4-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK4-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4, !llvm.access.group !33 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5898,60 +5898,60 @@ int bar(int n){ // CHECK4: omp.dispatch.body: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 // CHECK4-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK4-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK4-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK4-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 -// CHECK4-NEXT: store i8 [[CONV]], i8* [[IT]], align 1 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK4-NEXT: store i8 [[CONV]], i8* [[IT]], align 1, !llvm.access.group !36 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !36 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK4-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4, !llvm.access.group !36 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK4-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !36 // CHECK4-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double // CHECK4-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK4-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK4-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !36 // CHECK4-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK4-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4 +// CHECK4-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !36 // CHECK4-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double // CHECK4-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 // CHECK4-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK4-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4 +// CHECK4-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !36 // CHECK4-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 // CHECK4-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i32 0, i32 2 -// CHECK4-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8 +// CHECK4-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !36 // CHECK4-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK4-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8 +// CHECK4-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !36 // CHECK4-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] // CHECK4-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP23]] // CHECK4-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i32 3 -// CHECK4-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8 +// CHECK4-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !36 // CHECK4-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK4-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8 +// CHECK4-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !36 // CHECK4-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4 +// CHECK4-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !36 // CHECK4-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK4-NEXT: store i64 [[ADD20]], i64* [[X]], align 4 +// CHECK4-NEXT: store i64 [[ADD20]], i64* [[X]], align 4, !llvm.access.group !36 // CHECK4-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4 +// CHECK4-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !36 // CHECK4-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 // CHECK4-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 // CHECK4-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK4-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4 +// CHECK4-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4, !llvm.access.group !36 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK4-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK4-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -6347,37 +6347,37 @@ int bar(int n){ // CHECK4-NEXT: store i64 [[TMP8]], i64* [[DOTOMP_IV]], align 8 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK4-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK4-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 +// CHECK4-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !39 // CHECK4-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 // CHECK4-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK4-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK4-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK4-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !39 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[B_ADDR]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double // CHECK4-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: store double [[ADD]], double* [[A]], align 4 +// CHECK4-NEXT: store double [[ADD]], double* [[A]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP13:%.*]] = load double, double* [[A4]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load double, double* [[A4]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK4-NEXT: store double [[INC]], double* [[A4]], align 4 +// CHECK4-NEXT: store double [[INC]], double* [[A4]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 // CHECK4-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP14]] // CHECK4-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK4-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2 +// CHECK4-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2, !llvm.access.group !39 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK4-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 // CHECK4-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK4-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK4-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6515,35 +6515,35 @@ int bar(int n){ // CHECK4-NEXT: store i64 [[TMP5]], i64* [[DOTOMP_IV]], align 8 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK4-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK4-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !42 +// CHECK4-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !42 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK4-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !42 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] -// CHECK4-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK4-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !42 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !42 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !42 +// CHECK4-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !42 // CHECK4-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK4-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK4-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !42 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !42 // CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK4-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !42 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !42 // CHECK4-NEXT: [[ADD7:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK4-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK4-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !42 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6904,23 +6904,23 @@ int bar(int n){ // CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -6956,17 +6956,17 @@ int bar(int n){ // CHECK5-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK5-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]]) // CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) -// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !22 -// CHECK5-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !22 -// CHECK5-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !22 -// CHECK5-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !22 -// CHECK5-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !22 -// CHECK5-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !22 -// CHECK5-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !22 +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !25 +// CHECK5-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !25 +// CHECK5-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !25 +// CHECK5-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !25 +// CHECK5-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !25 +// CHECK5-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !25 +// CHECK5-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !25 // CHECK5-NEXT: [[TMP11:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 1, i32 0) #[[ATTR3]] // CHECK5-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK5-NEXT: br i1 [[TMP12]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] @@ -7039,32 +7039,32 @@ int bar(int n){ // CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] -// CHECK5-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !23 -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !23 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK5-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !26 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !26 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK5-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP9]], 3 // CHECK5-NEXT: [[CONV3:%.*]] = sext i32 [[MUL2]] to i64 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV3]] -// CHECK5-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !23 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !23 +// CHECK5-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !26 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !26 // CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK5-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !23 +// CHECK5-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !26 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK5-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: @@ -7177,44 +7177,44 @@ int bar(int n){ // CHECK5-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_IV]], align 8 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !29 // CHECK5-NEXT: [[CMP6:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] // CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK5-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 // CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 // CHECK5-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK5-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 +// CHECK5-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !29 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !29 // CHECK5-NEXT: [[CONV7:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK5-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK5-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !29 // CHECK5-NEXT: [[MUL8:%.*]] = mul i64 [[TMP11]], [[TMP12]] // CHECK5-NEXT: [[ADD:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK5-NEXT: [[CONV9:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK5-NEXT: store i32 [[CONV9]], i32* [[LIN4]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START3]], align 4 +// CHECK5-NEXT: store i32 [[CONV9]], i32* [[LIN4]], align 4, !llvm.access.group !29 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START3]], align 4, !llvm.access.group !29 // CHECK5-NEXT: [[CONV10:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK5-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK5-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !29 // CHECK5-NEXT: [[MUL11:%.*]] = mul i64 [[TMP14]], [[TMP15]] // CHECK5-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK5-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 -// CHECK5-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK5-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4, !llvm.access.group !29 +// CHECK5-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !29 // CHECK5-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK5-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK5-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK5-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8 +// CHECK5-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8, !llvm.access.group !29 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK5-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 // CHECK5-NEXT: [[ADD17:%.*]] = add i64 [[TMP17]], 1 -// CHECK5-NEXT: store i64 [[ADD17]], i64* [[DOTOMP_IV]], align 8 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK5-NEXT: store i64 [[ADD17]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -7314,32 +7314,32 @@ int bar(int n){ // CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 // CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK5-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK5-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2, !llvm.access.group !32 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !32 // CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK5-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK5-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !32 +// CHECK5-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !32 // CHECK5-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK5-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK5-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8 +// CHECK5-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8, !llvm.access.group !32 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -7475,60 +7475,60 @@ int bar(int n){ // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 // CHECK5-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK5-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK5-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK5-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 -// CHECK5-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1, !llvm.access.group !35 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !35 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK5-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8, !llvm.access.group !35 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK5-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK5-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double // CHECK5-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 // CHECK5-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK5-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4 +// CHECK5-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK5-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK5-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4 +// CHECK5-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4, !llvm.access.group !35 // CHECK5-NEXT: [[CONV13:%.*]] = fpext float [[TMP21]] to double // CHECK5-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.000000e+00 // CHECK5-NEXT: [[CONV15:%.*]] = fptrunc double [[ADD14]] to float -// CHECK5-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4 +// CHECK5-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4, !llvm.access.group !35 // CHECK5-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 // CHECK5-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX16]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8 +// CHECK5-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !35 // CHECK5-NEXT: [[ADD18:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK5-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8 +// CHECK5-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !35 // CHECK5-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] // CHECK5-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP23]] // CHECK5-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX19]], i64 3 -// CHECK5-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8 +// CHECK5-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8, !llvm.access.group !35 // CHECK5-NEXT: [[ADD21:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK5-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8 +// CHECK5-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8, !llvm.access.group !35 // CHECK5-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8 +// CHECK5-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !35 // CHECK5-NEXT: [[ADD22:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK5-NEXT: store i64 [[ADD22]], i64* [[X]], align 8 +// CHECK5-NEXT: store i64 [[ADD22]], i64* [[X]], align 8, !llvm.access.group !35 // CHECK5-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8 +// CHECK5-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !35 // CHECK5-NEXT: [[CONV23:%.*]] = sext i8 [[TMP26]] to i32 // CHECK5-NEXT: [[ADD24:%.*]] = add nsw i32 [[CONV23]], 1 // CHECK5-NEXT: [[CONV25:%.*]] = trunc i32 [[ADD24]] to i8 -// CHECK5-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8 +// CHECK5-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8, !llvm.access.group !35 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK5-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK5-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: @@ -7988,37 +7988,37 @@ int bar(int n){ // CHECK5-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_IV]], align 8 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK5-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 +// CHECK5-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !38 // CHECK5-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP10]], [[TMP11]] // CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 // CHECK5-NEXT: [[MUL:%.*]] = mul i64 [[TMP12]], 400 // CHECK5-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK5-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !38 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !38 // CHECK5-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP13]] to double // CHECK5-NEXT: [[ADD:%.*]] = fadd double [[CONV5]], 1.500000e+00 // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: store double [[ADD]], double* [[A]], align 8, !nontemporal !29 +// CHECK5-NEXT: store double [[ADD]], double* [[A]], align 8, !nontemporal !39, !llvm.access.group !38 // CHECK5-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP14:%.*]] = load double, double* [[A6]], align 8, !nontemporal !29 +// CHECK5-NEXT: [[TMP14:%.*]] = load double, double* [[A6]], align 8, !nontemporal !39, !llvm.access.group !38 // CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK5-NEXT: store double [[INC]], double* [[A6]], align 8, !nontemporal !29 +// CHECK5-NEXT: store double [[INC]], double* [[A6]], align 8, !nontemporal !39, !llvm.access.group !38 // CHECK5-NEXT: [[CONV7:%.*]] = fptosi double [[INC]] to i16 // CHECK5-NEXT: [[TMP15:%.*]] = mul nsw i64 1, [[TMP2]] // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP15]] // CHECK5-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK5-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2 +// CHECK5-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2, !llvm.access.group !38 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK5-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 // CHECK5-NEXT: [[ADD9:%.*]] = add i64 [[TMP16]], 1 -// CHECK5-NEXT: store i64 [[ADD9]], i64* [[DOTOMP_IV]], align 8 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK5-NEXT: store i64 [[ADD9]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_IF_END:%.*]] // CHECK5: omp_if.else: @@ -8070,7 +8070,7 @@ int bar(int n){ // CHECK5-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK5-NEXT: [[ADD30:%.*]] = add i64 [[TMP28]], 1 // CHECK5-NEXT: store i64 [[ADD30]], i64* [[DOTOMP_IV]], align 8 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND15]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND15]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK5: omp.inner.for.end31: // CHECK5-NEXT: br label [[OMP_IF_END]] // CHECK5: omp_if.end: @@ -8218,35 +8218,35 @@ int bar(int n){ // CHECK5-NEXT: store i64 [[TMP5]], i64* [[DOTOMP_IV]], align 8 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK5-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !44 +// CHECK5-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !44 // CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK5-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !44 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] -// CHECK5-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !44 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !44 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK5-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK5-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !44 +// CHECK5-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !44 // CHECK5-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK5-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK5-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK5-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !44 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK5-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4 +// CHECK5-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK5-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !44 // CHECK5-NEXT: [[ADD8:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK5-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK5-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !44 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -8607,23 +8607,23 @@ int bar(int n){ // CHECK6-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK6-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK6-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -8659,17 +8659,17 @@ int bar(int n){ // CHECK6-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK6-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK6-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]]) // CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) -// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !22 -// CHECK6-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !22 -// CHECK6-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !22 -// CHECK6-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !22 -// CHECK6-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !22 -// CHECK6-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !22 -// CHECK6-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !22 +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) +// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !25 +// CHECK6-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !25 +// CHECK6-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !25 +// CHECK6-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !25 +// CHECK6-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !25 +// CHECK6-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !25 +// CHECK6-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !25 // CHECK6-NEXT: [[TMP11:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 1, i32 0) #[[ATTR3]] // CHECK6-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK6-NEXT: br i1 [[TMP12]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] @@ -8742,32 +8742,32 @@ int bar(int n){ // CHECK6-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK6-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] -// CHECK6-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !23 -// CHECK6-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !23 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK6-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !26 +// CHECK6-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !26 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK6-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP9]], 3 // CHECK6-NEXT: [[CONV3:%.*]] = sext i32 [[MUL2]] to i64 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP8]], [[CONV3]] -// CHECK6-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !23 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !23 +// CHECK6-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !26 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !26 // CHECK6-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK6-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !23 +// CHECK6-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !26 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK6-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK6-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: @@ -8880,44 +8880,44 @@ int bar(int n){ // CHECK6-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_IV]], align 8 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK6-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK6-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK6-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !29 // CHECK6-NEXT: [[CMP6:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] // CHECK6-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 // CHECK6-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 // CHECK6-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK6-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 +// CHECK6-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !29 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !29 // CHECK6-NEXT: [[CONV7:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK6-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK6-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK6-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK6-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !29 // CHECK6-NEXT: [[MUL8:%.*]] = mul i64 [[TMP11]], [[TMP12]] // CHECK6-NEXT: [[ADD:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK6-NEXT: [[CONV9:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK6-NEXT: store i32 [[CONV9]], i32* [[LIN4]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START3]], align 4 +// CHECK6-NEXT: store i32 [[CONV9]], i32* [[LIN4]], align 4, !llvm.access.group !29 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START3]], align 4, !llvm.access.group !29 // CHECK6-NEXT: [[CONV10:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK6-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK6-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK6-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK6-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !29 // CHECK6-NEXT: [[MUL11:%.*]] = mul i64 [[TMP14]], [[TMP15]] // CHECK6-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK6-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 -// CHECK6-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4 -// CHECK6-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK6-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4, !llvm.access.group !29 +// CHECK6-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !29 // CHECK6-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK6-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK6-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK6-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8 +// CHECK6-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8, !llvm.access.group !29 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 // CHECK6-NEXT: [[ADD17:%.*]] = add i64 [[TMP17]], 1 -// CHECK6-NEXT: store i64 [[ADD17]], i64* [[DOTOMP_IV]], align 8 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK6-NEXT: store i64 [[ADD17]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -9017,32 +9017,32 @@ int bar(int n){ // CHECK6-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 // CHECK6-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK6-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK6-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2, !llvm.access.group !32 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !32 // CHECK6-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK6-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8 -// CHECK6-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK6-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !32 +// CHECK6-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !32 // CHECK6-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK6-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK6-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK6-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8 +// CHECK6-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8, !llvm.access.group !32 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK6-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK6-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -9178,60 +9178,60 @@ int bar(int n){ // CHECK6: omp.dispatch.body: // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 // CHECK6-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK6-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK6-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK6-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 -// CHECK6-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1 -// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1, !llvm.access.group !35 +// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !35 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK6-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8, !llvm.access.group !35 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK6-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK6-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK6-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double // CHECK6-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 // CHECK6-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK6-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4 +// CHECK6-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK6-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK6-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4 +// CHECK6-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4, !llvm.access.group !35 // CHECK6-NEXT: [[CONV13:%.*]] = fpext float [[TMP21]] to double // CHECK6-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.000000e+00 // CHECK6-NEXT: [[CONV15:%.*]] = fptrunc double [[ADD14]] to float -// CHECK6-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4 +// CHECK6-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4, !llvm.access.group !35 // CHECK6-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 // CHECK6-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX16]], i64 0, i64 2 -// CHECK6-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8 +// CHECK6-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !35 // CHECK6-NEXT: [[ADD18:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK6-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8 +// CHECK6-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !35 // CHECK6-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] // CHECK6-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP23]] // CHECK6-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX19]], i64 3 -// CHECK6-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8 +// CHECK6-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8, !llvm.access.group !35 // CHECK6-NEXT: [[ADD21:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK6-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8 +// CHECK6-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8, !llvm.access.group !35 // CHECK6-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8 +// CHECK6-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !35 // CHECK6-NEXT: [[ADD22:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK6-NEXT: store i64 [[ADD22]], i64* [[X]], align 8 +// CHECK6-NEXT: store i64 [[ADD22]], i64* [[X]], align 8, !llvm.access.group !35 // CHECK6-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8 +// CHECK6-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !35 // CHECK6-NEXT: [[CONV23:%.*]] = sext i8 [[TMP26]] to i32 // CHECK6-NEXT: [[ADD24:%.*]] = add nsw i32 [[CONV23]], 1 // CHECK6-NEXT: [[CONV25:%.*]] = trunc i32 [[ADD24]] to i8 -// CHECK6-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8 +// CHECK6-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8, !llvm.access.group !35 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK6-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK6-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: @@ -9691,37 +9691,37 @@ int bar(int n){ // CHECK6-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_IV]], align 8 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK6-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK6-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 +// CHECK6-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !38 // CHECK6-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP10]], [[TMP11]] // CHECK6-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 // CHECK6-NEXT: [[MUL:%.*]] = mul i64 [[TMP12]], 400 // CHECK6-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK6-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !38 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !38 // CHECK6-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP13]] to double // CHECK6-NEXT: [[ADD:%.*]] = fadd double [[CONV5]], 1.500000e+00 // CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: store double [[ADD]], double* [[A]], align 8, !nontemporal !29 +// CHECK6-NEXT: store double [[ADD]], double* [[A]], align 8, !nontemporal !39, !llvm.access.group !38 // CHECK6-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP14:%.*]] = load double, double* [[A6]], align 8, !nontemporal !29 +// CHECK6-NEXT: [[TMP14:%.*]] = load double, double* [[A6]], align 8, !nontemporal !39, !llvm.access.group !38 // CHECK6-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK6-NEXT: store double [[INC]], double* [[A6]], align 8, !nontemporal !29 +// CHECK6-NEXT: store double [[INC]], double* [[A6]], align 8, !nontemporal !39, !llvm.access.group !38 // CHECK6-NEXT: [[CONV7:%.*]] = fptosi double [[INC]] to i16 // CHECK6-NEXT: [[TMP15:%.*]] = mul nsw i64 1, [[TMP2]] // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP15]] // CHECK6-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK6-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2 +// CHECK6-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2, !llvm.access.group !38 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 // CHECK6-NEXT: [[ADD9:%.*]] = add i64 [[TMP16]], 1 -// CHECK6-NEXT: store i64 [[ADD9]], i64* [[DOTOMP_IV]], align 8 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK6-NEXT: store i64 [[ADD9]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !38 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_IF_END:%.*]] // CHECK6: omp_if.else: @@ -9773,7 +9773,7 @@ int bar(int n){ // CHECK6-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK6-NEXT: [[ADD30:%.*]] = add i64 [[TMP28]], 1 // CHECK6-NEXT: store i64 [[ADD30]], i64* [[DOTOMP_IV]], align 8 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND15]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND15]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK6: omp.inner.for.end31: // CHECK6-NEXT: br label [[OMP_IF_END]] // CHECK6: omp_if.end: @@ -9921,35 +9921,35 @@ int bar(int n){ // CHECK6-NEXT: store i64 [[TMP5]], i64* [[DOTOMP_IV]], align 8 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK6-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK6-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !44 +// CHECK6-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !44 // CHECK6-NEXT: [[CMP2:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !44 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] -// CHECK6-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !44 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !44 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK6-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK6-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !44 +// CHECK6-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !44 // CHECK6-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK6-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK6-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK6-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK6-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !44 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK6-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK6-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4 +// CHECK6-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK6-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !44 // CHECK6-NEXT: [[ADD8:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK6-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK6-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !44 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -10300,23 +10300,23 @@ int bar(int n){ // CHECK7-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -10352,17 +10352,17 @@ int bar(int n){ // CHECK7-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4 // CHECK7-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK7-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK7-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) // CHECK7-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK7-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK7-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) -// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !23 -// CHECK7-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !23 -// CHECK7-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !23 -// CHECK7-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !23 -// CHECK7-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !23 -// CHECK7-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !23 -// CHECK7-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !23 +// CHECK7-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK7-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK7-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]]) +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !26 +// CHECK7-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !26 +// CHECK7-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !26 +// CHECK7-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !26 +// CHECK7-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !26 +// CHECK7-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !26 +// CHECK7-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !26 // CHECK7-NEXT: [[TMP11:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 1, i32 0) #[[ATTR3]] // CHECK7-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK7-NEXT: br i1 [[TMP12]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] @@ -10430,32 +10430,32 @@ int bar(int n){ // CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK7-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] -// CHECK7-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !24 -// CHECK7-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !24 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK7-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK7-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !27 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK7-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP10]], 3 // CHECK7-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP9]], [[CONV]] -// CHECK7-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !24 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !24 +// CHECK7-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !27 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !27 // CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK7-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !24 +// CHECK7-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !27 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK7-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: @@ -10562,44 +10562,44 @@ int bar(int n){ // CHECK7-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_IV]], align 8 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK7-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK7-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !30 // CHECK7-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] // CHECK7-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK7-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 // CHECK7-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 // CHECK7-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK7-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 +// CHECK7-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !30 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !30 // CHECK7-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK7-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK7-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK7-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK7-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !30 // CHECK7-NEXT: [[MUL6:%.*]] = mul i64 [[TMP11]], [[TMP12]] // CHECK7-NEXT: [[ADD:%.*]] = add i64 [[CONV5]], [[MUL6]] // CHECK7-NEXT: [[CONV7:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK7-NEXT: store i32 [[CONV7]], i32* [[LIN2]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4 +// CHECK7-NEXT: store i32 [[CONV7]], i32* [[LIN2]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4, !llvm.access.group !30 // CHECK7-NEXT: [[CONV8:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK7-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK7-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK7-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK7-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !30 // CHECK7-NEXT: [[MUL9:%.*]] = mul i64 [[TMP14]], [[TMP15]] // CHECK7-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK7-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 -// CHECK7-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK7-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !30 // CHECK7-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK7-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK7-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK7-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 +// CHECK7-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !30 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK7-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 // CHECK7-NEXT: [[ADD15:%.*]] = add i64 [[TMP17]], 1 -// CHECK7-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_IV]], align 8 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK7-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -10696,32 +10696,32 @@ int bar(int n){ // CHECK7-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK7-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK7-NEXT: store i16 [[CONV2]], i16* [[IT]], align 2 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK7-NEXT: store i16 [[CONV2]], i16* [[IT]], align 2, !llvm.access.group !33 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !33 // CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK7-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK7-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !33 +// CHECK7-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !33 // CHECK7-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK7-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK7-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4 +// CHECK7-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4, !llvm.access.group !33 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK7-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK7-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -10851,60 +10851,60 @@ int bar(int n){ // CHECK7: omp.dispatch.body: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 // CHECK7-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK7-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK7-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK7-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 -// CHECK7-NEXT: store i8 [[CONV]], i8* [[IT]], align 1 -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK7-NEXT: store i8 [[CONV]], i8* [[IT]], align 1, !llvm.access.group !36 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !36 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK7-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4, !llvm.access.group !36 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK7-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !36 // CHECK7-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double // CHECK7-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK7-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK7-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4 +// CHECK7-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !36 // CHECK7-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK7-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4 +// CHECK7-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !36 // CHECK7-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double // CHECK7-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 // CHECK7-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK7-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4 +// CHECK7-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !36 // CHECK7-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 // CHECK7-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8 +// CHECK7-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !36 // CHECK7-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK7-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8 +// CHECK7-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !36 // CHECK7-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] // CHECK7-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP23]] // CHECK7-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i32 3 -// CHECK7-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8 +// CHECK7-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !36 // CHECK7-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK7-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8 +// CHECK7-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !36 // CHECK7-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4 +// CHECK7-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !36 // CHECK7-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK7-NEXT: store i64 [[ADD20]], i64* [[X]], align 4 +// CHECK7-NEXT: store i64 [[ADD20]], i64* [[X]], align 4, !llvm.access.group !36 // CHECK7-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK7-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4 +// CHECK7-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !36 // CHECK7-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 // CHECK7-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 // CHECK7-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK7-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4 +// CHECK7-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4, !llvm.access.group !36 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK7-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK7-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: @@ -11358,37 +11358,37 @@ int bar(int n){ // CHECK7-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_IV]], align 8 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK7-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 +// CHECK7-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !39 // CHECK7-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP10]], [[TMP11]] // CHECK7-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK7-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 // CHECK7-NEXT: [[MUL:%.*]] = mul i64 [[TMP12]], 400 // CHECK7-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK7-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK7-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !39 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[B_ADDR]], align 4, !llvm.access.group !39 // CHECK7-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP13]] to double // CHECK7-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: store double [[ADD]], double* [[A]], align 4, !nontemporal !30 +// CHECK7-NEXT: store double [[ADD]], double* [[A]], align 4, !nontemporal !40, !llvm.access.group !39 // CHECK7-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP14:%.*]] = load double, double* [[A5]], align 4, !nontemporal !30 +// CHECK7-NEXT: [[TMP14:%.*]] = load double, double* [[A5]], align 4, !nontemporal !40, !llvm.access.group !39 // CHECK7-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK7-NEXT: store double [[INC]], double* [[A5]], align 4, !nontemporal !30 +// CHECK7-NEXT: store double [[INC]], double* [[A5]], align 4, !nontemporal !40, !llvm.access.group !39 // CHECK7-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 // CHECK7-NEXT: [[TMP15:%.*]] = mul nsw i32 1, [[TMP2]] // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP15]] // CHECK7-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK7-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// CHECK7-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2, !llvm.access.group !39 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK7-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 // CHECK7-NEXT: [[ADD8:%.*]] = add i64 [[TMP16]], 1 -// CHECK7-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK7-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_IF_END:%.*]] // CHECK7: omp_if.else: @@ -11440,7 +11440,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK7-NEXT: [[ADD29:%.*]] = add i64 [[TMP28]], 1 // CHECK7-NEXT: store i64 [[ADD29]], i64* [[DOTOMP_IV]], align 8 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND14]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND14]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK7: omp.inner.for.end30: // CHECK7-NEXT: br label [[OMP_IF_END]] // CHECK7: omp_if.end: @@ -11582,35 +11582,35 @@ int bar(int n){ // CHECK7-NEXT: store i64 [[TMP5]], i64* [[DOTOMP_IV]], align 8 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK7-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK7-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !45 +// CHECK7-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !45 // CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK7-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !45 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] -// CHECK7-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK7-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !45 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !45 // CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK7-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !45 // CHECK7-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK7-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK7-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK7-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !45 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK7-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK7-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK7-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !45 // CHECK7-NEXT: [[ADD7:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK7-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK7-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !45 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -11961,23 +11961,23 @@ int bar(int n){ // CHECK8-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK8-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK8-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK8-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK8-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -12013,17 +12013,17 @@ int bar(int n){ // CHECK8-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 4 // CHECK8-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK8-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK8-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) // CHECK8-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK8-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK8-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) -// CHECK8-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !23 -// CHECK8-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !23 -// CHECK8-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !23 -// CHECK8-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !23 -// CHECK8-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !23 -// CHECK8-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !23 -// CHECK8-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !23 +// CHECK8-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK8-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK8-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]]) +// CHECK8-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !26 +// CHECK8-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !26 +// CHECK8-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !26 +// CHECK8-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !26 +// CHECK8-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !26 +// CHECK8-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !26 +// CHECK8-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !26 // CHECK8-NEXT: [[TMP11:%.*]] = call i32 @__tgt_target_teams_nowait_mapper(%struct.ident_t* @[[GLOB2]], i64 -1, i8* @.{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l96.region_id, i32 0, i8** null, i8** null, i64* null, i64* null, i8** null, i8** null, i32 1, i32 0) #[[ATTR3]] // CHECK8-NEXT: [[TMP12:%.*]] = icmp ne i32 [[TMP11]], 0 // CHECK8-NEXT: br i1 [[TMP12]], label [[OMP_OFFLOAD_FAILED_I:%.*]], label [[DOTOMP_OUTLINED__1_EXIT:%.*]] @@ -12091,32 +12091,32 @@ int bar(int n){ // CHECK8-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK8-NEXT: [[SUB:%.*]] = sub nsw i32 10, [[MUL]] -// CHECK8-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !24 -// CHECK8-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !24 -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK8-NEXT: store i32 [[SUB]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK8-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTLINEAR_START]], align 8, !llvm.access.group !27 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK8-NEXT: [[MUL2:%.*]] = mul nsw i32 [[TMP10]], 3 // CHECK8-NEXT: [[CONV:%.*]] = sext i32 [[MUL2]] to i64 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i64 [[TMP9]], [[CONV]] -// CHECK8-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !24 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !24 +// CHECK8-NEXT: store i64 [[ADD]], i64* [[K1]], align 8, !llvm.access.group !27 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !27 // CHECK8-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK8-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !24 +// CHECK8-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !27 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK8-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK8-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK8: omp.dispatch.inc: @@ -12223,44 +12223,44 @@ int bar(int n){ // CHECK8-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_IV]], align 8 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK8-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK8-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK8-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !30 // CHECK8-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] // CHECK8-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK8-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 // CHECK8-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 // CHECK8-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK8-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 +// CHECK8-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !30 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !30 // CHECK8-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK8-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK8-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK8-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK8-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !30 // CHECK8-NEXT: [[MUL6:%.*]] = mul i64 [[TMP11]], [[TMP12]] // CHECK8-NEXT: [[ADD:%.*]] = add i64 [[CONV5]], [[MUL6]] // CHECK8-NEXT: [[CONV7:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK8-NEXT: store i32 [[CONV7]], i32* [[LIN2]], align 4 -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4 +// CHECK8-NEXT: store i32 [[CONV7]], i32* [[LIN2]], align 4, !llvm.access.group !30 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4, !llvm.access.group !30 // CHECK8-NEXT: [[CONV8:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK8-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK8-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK8-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK8-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !30 // CHECK8-NEXT: [[MUL9:%.*]] = mul i64 [[TMP14]], [[TMP15]] // CHECK8-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK8-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 -// CHECK8-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4 -// CHECK8-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK8-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4, !llvm.access.group !30 +// CHECK8-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !30 // CHECK8-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK8-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK8-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK8-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 +// CHECK8-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !30 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK8-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 // CHECK8-NEXT: [[ADD15:%.*]] = add i64 [[TMP17]], 1 -// CHECK8-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_IV]], align 8 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK8-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -12357,32 +12357,32 @@ int bar(int n){ // CHECK8-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK8-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK8-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK8-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK8-NEXT: store i16 [[CONV2]], i16* [[IT]], align 2 -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK8-NEXT: store i16 [[CONV2]], i16* [[IT]], align 2, !llvm.access.group !33 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !33 // CHECK8-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK8-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4 -// CHECK8-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK8-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !33 +// CHECK8-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !33 // CHECK8-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK8-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK8-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK8-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4 +// CHECK8-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4, !llvm.access.group !33 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK8-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK8-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -12512,60 +12512,60 @@ int bar(int n){ // CHECK8: omp.dispatch.body: // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK8-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 // CHECK8-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK8-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK8-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK8-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 -// CHECK8-NEXT: store i8 [[CONV]], i8* [[IT]], align 1 -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK8-NEXT: store i8 [[CONV]], i8* [[IT]], align 1, !llvm.access.group !36 +// CHECK8-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !36 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK8-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4, !llvm.access.group !36 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK8-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK8-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !36 // CHECK8-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double // CHECK8-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK8-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK8-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4 +// CHECK8-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !36 // CHECK8-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK8-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4 +// CHECK8-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !36 // CHECK8-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double // CHECK8-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 // CHECK8-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK8-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4 +// CHECK8-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !36 // CHECK8-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 // CHECK8-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i32 0, i32 2 -// CHECK8-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8 +// CHECK8-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !36 // CHECK8-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK8-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8 +// CHECK8-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !36 // CHECK8-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] // CHECK8-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP23]] // CHECK8-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i32 3 -// CHECK8-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8 +// CHECK8-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !36 // CHECK8-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK8-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8 +// CHECK8-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !36 // CHECK8-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4 +// CHECK8-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !36 // CHECK8-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK8-NEXT: store i64 [[ADD20]], i64* [[X]], align 4 +// CHECK8-NEXT: store i64 [[ADD20]], i64* [[X]], align 4, !llvm.access.group !36 // CHECK8-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK8-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4 +// CHECK8-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !36 // CHECK8-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 // CHECK8-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 // CHECK8-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK8-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4 +// CHECK8-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4, !llvm.access.group !36 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK8-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK8-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK8: omp.dispatch.inc: @@ -13019,37 +13019,37 @@ int bar(int n){ // CHECK8-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_IV]], align 8 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK8-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK8-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 +// CHECK8-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !39 // CHECK8-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP10]], [[TMP11]] // CHECK8-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK8-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 // CHECK8-NEXT: [[MUL:%.*]] = mul i64 [[TMP12]], 400 // CHECK8-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK8-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK8-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !39 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[B_ADDR]], align 4, !llvm.access.group !39 // CHECK8-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP13]] to double // CHECK8-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK8-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK8-NEXT: store double [[ADD]], double* [[A]], align 4, !nontemporal !30 +// CHECK8-NEXT: store double [[ADD]], double* [[A]], align 4, !nontemporal !40, !llvm.access.group !39 // CHECK8-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP14:%.*]] = load double, double* [[A5]], align 4, !nontemporal !30 +// CHECK8-NEXT: [[TMP14:%.*]] = load double, double* [[A5]], align 4, !nontemporal !40, !llvm.access.group !39 // CHECK8-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK8-NEXT: store double [[INC]], double* [[A5]], align 4, !nontemporal !30 +// CHECK8-NEXT: store double [[INC]], double* [[A5]], align 4, !nontemporal !40, !llvm.access.group !39 // CHECK8-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 // CHECK8-NEXT: [[TMP15:%.*]] = mul nsw i32 1, [[TMP2]] // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP15]] // CHECK8-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK8-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// CHECK8-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2, !llvm.access.group !39 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK8-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 // CHECK8-NEXT: [[ADD8:%.*]] = add i64 [[TMP16]], 1 -// CHECK8-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK8-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !39 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_IF_END:%.*]] // CHECK8: omp_if.else: @@ -13101,7 +13101,7 @@ int bar(int n){ // CHECK8-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK8-NEXT: [[ADD29:%.*]] = add i64 [[TMP28]], 1 // CHECK8-NEXT: store i64 [[ADD29]], i64* [[DOTOMP_IV]], align 8 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND14]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND14]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK8: omp.inner.for.end30: // CHECK8-NEXT: br label [[OMP_IF_END]] // CHECK8: omp_if.end: @@ -13243,35 +13243,35 @@ int bar(int n){ // CHECK8-NEXT: store i64 [[TMP5]], i64* [[DOTOMP_IV]], align 8 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK8-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK8-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !45 +// CHECK8-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !45 // CHECK8-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] // CHECK8-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK8-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !45 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] -// CHECK8-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK8-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !45 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !45 // CHECK8-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK8-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK8-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK8-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !45 +// CHECK8-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !45 // CHECK8-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK8-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK8-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK8-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK8-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !45 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK8-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK8-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK8-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK8-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !45 // CHECK8-NEXT: [[ADD7:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK8-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK8-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !45 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -17432,23 +17432,23 @@ int bar(int n){ // CHECK17-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -17551,44 +17551,44 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_IV]], align 8 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !17 // CHECK17-NEXT: [[CMP6:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 // CHECK17-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 // CHECK17-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK17-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 +// CHECK17-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !17 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !17 // CHECK17-NEXT: [[CONV7:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !17 // CHECK17-NEXT: [[MUL8:%.*]] = mul i64 [[TMP11]], [[TMP12]] // CHECK17-NEXT: [[ADD:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK17-NEXT: [[CONV9:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK17-NEXT: store i32 [[CONV9]], i32* [[LIN4]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START3]], align 4 +// CHECK17-NEXT: store i32 [[CONV9]], i32* [[LIN4]], align 4, !llvm.access.group !17 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START3]], align 4, !llvm.access.group !17 // CHECK17-NEXT: [[CONV10:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK17-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK17-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !17 // CHECK17-NEXT: [[MUL11:%.*]] = mul i64 [[TMP14]], [[TMP15]] // CHECK17-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK17-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 -// CHECK17-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4, !llvm.access.group !17 +// CHECK17-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !17 // CHECK17-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK17-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK17-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK17-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8 +// CHECK17-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8, !llvm.access.group !17 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 // CHECK17-NEXT: [[ADD17:%.*]] = add i64 [[TMP17]], 1 -// CHECK17-NEXT: store i64 [[ADD17]], i64* [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK17-NEXT: store i64 [[ADD17]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -17694,32 +17694,32 @@ int bar(int n){ // CHECK17-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 // CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK17-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK17-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2, !llvm.access.group !20 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !20 // CHECK17-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !20 +// CHECK17-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !20 // CHECK17-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK17-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK17-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8, !llvm.access.group !20 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -17855,60 +17855,60 @@ int bar(int n){ // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK17-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK17-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 -// CHECK17-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1, !llvm.access.group !23 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !23 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8, !llvm.access.group !23 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !23 // CHECK17-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double // CHECK17-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 // CHECK17-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK17-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4 +// CHECK17-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4, !llvm.access.group !23 // CHECK17-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK17-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4, !llvm.access.group !23 // CHECK17-NEXT: [[CONV13:%.*]] = fpext float [[TMP21]] to double // CHECK17-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.000000e+00 // CHECK17-NEXT: [[CONV15:%.*]] = fptrunc double [[ADD14]] to float -// CHECK17-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4 +// CHECK17-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4, !llvm.access.group !23 // CHECK17-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 // CHECK17-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX16]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !23 // CHECK17-NEXT: [[ADD18:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK17-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8 +// CHECK17-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !23 // CHECK17-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] // CHECK17-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP23]] // CHECK17-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX19]], i64 3 -// CHECK17-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8 +// CHECK17-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8, !llvm.access.group !23 // CHECK17-NEXT: [[ADD21:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK17-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8 +// CHECK17-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8, !llvm.access.group !23 // CHECK17-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8 +// CHECK17-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !23 // CHECK17-NEXT: [[ADD22:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK17-NEXT: store i64 [[ADD22]], i64* [[X]], align 8 +// CHECK17-NEXT: store i64 [[ADD22]], i64* [[X]], align 8, !llvm.access.group !23 // CHECK17-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8 +// CHECK17-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !23 // CHECK17-NEXT: [[CONV23:%.*]] = sext i8 [[TMP26]] to i32 // CHECK17-NEXT: [[ADD24:%.*]] = add nsw i32 [[CONV23]], 1 // CHECK17-NEXT: [[CONV25:%.*]] = trunc i32 [[ADD24]] to i8 -// CHECK17-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8 +// CHECK17-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8, !llvm.access.group !23 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK17-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK17-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: @@ -18069,37 +18069,37 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[TMP8]], i64* [[DOTOMP_IV]], align 8 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 +// CHECK17-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !26 // CHECK17-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] // CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 // CHECK17-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK17-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK17-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !26 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !26 // CHECK17-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK17-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK17-NEXT: store double [[ADD]], double* [[A]], align 8 +// CHECK17-NEXT: store double [[ADD]], double* [[A]], align 8, !llvm.access.group !26 // CHECK17-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP13:%.*]] = load double, double* [[A5]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = load double, double* [[A5]], align 8, !llvm.access.group !26 // CHECK17-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK17-NEXT: store double [[INC]], double* [[A5]], align 8 +// CHECK17-NEXT: store double [[INC]], double* [[A5]], align 8, !llvm.access.group !26 // CHECK17-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 // CHECK17-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP14]] // CHECK17-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK17-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// CHECK17-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2, !llvm.access.group !26 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 // CHECK17-NEXT: [[ADD8:%.*]] = add i64 [[TMP15]], 1 -// CHECK17-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK17-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -18185,35 +18185,35 @@ int bar(int n){ // CHECK17-NEXT: store i64 [[TMP5]], i64* [[DOTOMP_IV]], align 8 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK17-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !29 // CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] -// CHECK17-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !29 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !29 // CHECK17-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK17-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !29 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !29 // CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK17-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4 +// CHECK17-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !29 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 // CHECK17-NEXT: [[ADD8:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK17-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK17-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -18271,23 +18271,23 @@ int bar(int n){ // CHECK18-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK18-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK18-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK18-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK18-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -18390,44 +18390,44 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_IV]], align 8 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK18-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK18-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 +// CHECK18-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !17 // CHECK18-NEXT: [[CMP6:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] // CHECK18-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK18-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 // CHECK18-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 // CHECK18-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK18-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 +// CHECK18-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !17 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !17 // CHECK18-NEXT: [[CONV7:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK18-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK18-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK18-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 +// CHECK18-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !17 // CHECK18-NEXT: [[MUL8:%.*]] = mul i64 [[TMP11]], [[TMP12]] // CHECK18-NEXT: [[ADD:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK18-NEXT: [[CONV9:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK18-NEXT: store i32 [[CONV9]], i32* [[LIN4]], align 4 -// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START3]], align 4 +// CHECK18-NEXT: store i32 [[CONV9]], i32* [[LIN4]], align 4, !llvm.access.group !17 +// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START3]], align 4, !llvm.access.group !17 // CHECK18-NEXT: [[CONV10:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK18-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK18-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK18-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 +// CHECK18-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !17 // CHECK18-NEXT: [[MUL11:%.*]] = mul i64 [[TMP14]], [[TMP15]] // CHECK18-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK18-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 -// CHECK18-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4 -// CHECK18-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4, !llvm.access.group !17 +// CHECK18-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !17 // CHECK18-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK18-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK18-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK18-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8 +// CHECK18-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8, !llvm.access.group !17 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK18-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 // CHECK18-NEXT: [[ADD17:%.*]] = add i64 [[TMP17]], 1 -// CHECK18-NEXT: store i64 [[ADD17]], i64* [[DOTOMP_IV]], align 8 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK18-NEXT: store i64 [[ADD17]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -18533,32 +18533,32 @@ int bar(int n){ // CHECK18-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 // CHECK18-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK18-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK18-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK18-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2, !llvm.access.group !20 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !20 // CHECK18-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK18-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8 -// CHECK18-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !20 +// CHECK18-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !20 // CHECK18-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK18-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK18-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK18-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8, !llvm.access.group !20 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK18-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -18694,60 +18694,60 @@ int bar(int n){ // CHECK18: omp.dispatch.body: // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK18-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK18-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK18-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK18-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 -// CHECK18-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1, !llvm.access.group !23 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !23 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8, !llvm.access.group !23 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK18-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK18-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !23 // CHECK18-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double // CHECK18-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 // CHECK18-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK18-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4 +// CHECK18-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4, !llvm.access.group !23 // CHECK18-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK18-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4 +// CHECK18-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4, !llvm.access.group !23 // CHECK18-NEXT: [[CONV13:%.*]] = fpext float [[TMP21]] to double // CHECK18-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.000000e+00 // CHECK18-NEXT: [[CONV15:%.*]] = fptrunc double [[ADD14]] to float -// CHECK18-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4 +// CHECK18-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4, !llvm.access.group !23 // CHECK18-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 // CHECK18-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX16]], i64 0, i64 2 -// CHECK18-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8 +// CHECK18-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !23 // CHECK18-NEXT: [[ADD18:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK18-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8 +// CHECK18-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !23 // CHECK18-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] // CHECK18-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP23]] // CHECK18-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX19]], i64 3 -// CHECK18-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8 +// CHECK18-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8, !llvm.access.group !23 // CHECK18-NEXT: [[ADD21:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK18-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8 +// CHECK18-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8, !llvm.access.group !23 // CHECK18-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK18-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8 +// CHECK18-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !23 // CHECK18-NEXT: [[ADD22:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK18-NEXT: store i64 [[ADD22]], i64* [[X]], align 8 +// CHECK18-NEXT: store i64 [[ADD22]], i64* [[X]], align 8, !llvm.access.group !23 // CHECK18-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK18-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8 +// CHECK18-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !23 // CHECK18-NEXT: [[CONV23:%.*]] = sext i8 [[TMP26]] to i32 // CHECK18-NEXT: [[ADD24:%.*]] = add nsw i32 [[CONV23]], 1 // CHECK18-NEXT: [[CONV25:%.*]] = trunc i32 [[ADD24]] to i8 -// CHECK18-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8 +// CHECK18-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8, !llvm.access.group !23 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK18-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK18-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: @@ -18908,37 +18908,37 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[TMP8]], i64* [[DOTOMP_IV]], align 8 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK18-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK18-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 +// CHECK18-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !26 // CHECK18-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] // CHECK18-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK18-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 // CHECK18-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK18-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK18-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !26 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !26 // CHECK18-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK18-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK18-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK18-NEXT: store double [[ADD]], double* [[A]], align 8 +// CHECK18-NEXT: store double [[ADD]], double* [[A]], align 8, !llvm.access.group !26 // CHECK18-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK18-NEXT: [[TMP13:%.*]] = load double, double* [[A5]], align 8 +// CHECK18-NEXT: [[TMP13:%.*]] = load double, double* [[A5]], align 8, !llvm.access.group !26 // CHECK18-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK18-NEXT: store double [[INC]], double* [[A5]], align 8 +// CHECK18-NEXT: store double [[INC]], double* [[A5]], align 8, !llvm.access.group !26 // CHECK18-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 // CHECK18-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP14]] // CHECK18-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK18-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// CHECK18-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2, !llvm.access.group !26 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK18-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 // CHECK18-NEXT: [[ADD8:%.*]] = add i64 [[TMP15]], 1 -// CHECK18-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK18-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -19024,35 +19024,35 @@ int bar(int n){ // CHECK18-NEXT: store i64 [[TMP5]], i64* [[DOTOMP_IV]], align 8 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK18-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK18-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK18-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !29 // CHECK18-NEXT: [[CMP2:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] // CHECK18-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK18-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] -// CHECK18-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !29 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !29 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK18-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK18-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !29 +// CHECK18-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !29 // CHECK18-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK18-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK18-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK18-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !29 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !29 // CHECK18-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK18-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4 +// CHECK18-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !29 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK18-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 // CHECK18-NEXT: [[ADD8:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK18-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK18-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !29 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -19110,23 +19110,23 @@ int bar(int n){ // CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK19-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -19223,44 +19223,44 @@ int bar(int n){ // CHECK19-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_IV]], align 8 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 +// CHECK19-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !18 // CHECK19-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] // CHECK19-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 // CHECK19-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 // CHECK19-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK19-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 +// CHECK19-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !18 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !18 // CHECK19-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK19-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK19-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 +// CHECK19-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !18 // CHECK19-NEXT: [[MUL6:%.*]] = mul i64 [[TMP11]], [[TMP12]] // CHECK19-NEXT: [[ADD:%.*]] = add i64 [[CONV5]], [[MUL6]] // CHECK19-NEXT: [[CONV7:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK19-NEXT: store i32 [[CONV7]], i32* [[LIN2]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4 +// CHECK19-NEXT: store i32 [[CONV7]], i32* [[LIN2]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4, !llvm.access.group !18 // CHECK19-NEXT: [[CONV8:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK19-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK19-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 +// CHECK19-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !18 // CHECK19-NEXT: [[MUL9:%.*]] = mul i64 [[TMP14]], [[TMP15]] // CHECK19-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK19-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 -// CHECK19-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !18 // CHECK19-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK19-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK19-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK19-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 +// CHECK19-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !18 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 // CHECK19-NEXT: [[ADD15:%.*]] = add i64 [[TMP17]], 1 -// CHECK19-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK19-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -19363,32 +19363,32 @@ int bar(int n){ // CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK19-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK19-NEXT: store i16 [[CONV2]], i16* [[IT]], align 2 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK19-NEXT: store i16 [[CONV2]], i16* [[IT]], align 2, !llvm.access.group !21 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !21 // CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK19-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !21 +// CHECK19-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !21 // CHECK19-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK19-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK19-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4 +// CHECK19-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4, !llvm.access.group !21 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK19-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -19518,60 +19518,60 @@ int bar(int n){ // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK19-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK19-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 -// CHECK19-NEXT: store i8 [[CONV]], i8* [[IT]], align 1 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK19-NEXT: store i8 [[CONV]], i8* [[IT]], align 1, !llvm.access.group !24 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !24 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK19-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4, !llvm.access.group !24 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK19-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double // CHECK19-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK19-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK19-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4 +// CHECK19-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK19-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK19-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !24 // CHECK19-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double // CHECK19-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 // CHECK19-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK19-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4 +// CHECK19-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !24 // CHECK19-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 // CHECK19-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8 +// CHECK19-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !24 // CHECK19-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK19-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8 +// CHECK19-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !24 // CHECK19-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] // CHECK19-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP23]] // CHECK19-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i32 3 -// CHECK19-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8 +// CHECK19-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !24 // CHECK19-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK19-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8 +// CHECK19-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !24 // CHECK19-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4 +// CHECK19-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !24 // CHECK19-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK19-NEXT: store i64 [[ADD20]], i64* [[X]], align 4 +// CHECK19-NEXT: store i64 [[ADD20]], i64* [[X]], align 4, !llvm.access.group !24 // CHECK19-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK19-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4 +// CHECK19-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !24 // CHECK19-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 // CHECK19-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 // CHECK19-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK19-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4 +// CHECK19-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4, !llvm.access.group !24 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK19-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK19-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: @@ -19726,37 +19726,37 @@ int bar(int n){ // CHECK19-NEXT: store i64 [[TMP8]], i64* [[DOTOMP_IV]], align 8 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 +// CHECK19-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !27 // CHECK19-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] // CHECK19-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 // CHECK19-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK19-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK19-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK19-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !27 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[B_ADDR]], align 4, !llvm.access.group !27 // CHECK19-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double // CHECK19-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 // CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK19-NEXT: store double [[ADD]], double* [[A]], align 4 +// CHECK19-NEXT: store double [[ADD]], double* [[A]], align 4, !llvm.access.group !27 // CHECK19-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP13:%.*]] = load double, double* [[A4]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load double, double* [[A4]], align 4, !llvm.access.group !27 // CHECK19-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK19-NEXT: store double [[INC]], double* [[A4]], align 4 +// CHECK19-NEXT: store double [[INC]], double* [[A4]], align 4, !llvm.access.group !27 // CHECK19-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 // CHECK19-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP14]] // CHECK19-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK19-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2 +// CHECK19-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2, !llvm.access.group !27 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 // CHECK19-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK19-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK19-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -19839,35 +19839,35 @@ int bar(int n){ // CHECK19-NEXT: store i64 [[TMP5]], i64* [[DOTOMP_IV]], align 8 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK19-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK19-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !30 // CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] -// CHECK19-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK19-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !30 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !30 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK19-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !30 +// CHECK19-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !30 // CHECK19-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK19-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !30 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !30 // CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK19-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK19-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !30 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK19-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 // CHECK19-NEXT: [[ADD7:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK19-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK19-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -19925,23 +19925,23 @@ int bar(int n){ // CHECK20-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK20-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK20-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK20-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK20-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK20-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -20038,44 +20038,44 @@ int bar(int n){ // CHECK20-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_IV]], align 8 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK20-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK20-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 +// CHECK20-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !18 // CHECK20-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] // CHECK20-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK20-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 // CHECK20-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 // CHECK20-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK20-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 +// CHECK20-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !18 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !18 // CHECK20-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK20-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK20-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK20-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 +// CHECK20-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !18 // CHECK20-NEXT: [[MUL6:%.*]] = mul i64 [[TMP11]], [[TMP12]] // CHECK20-NEXT: [[ADD:%.*]] = add i64 [[CONV5]], [[MUL6]] // CHECK20-NEXT: [[CONV7:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK20-NEXT: store i32 [[CONV7]], i32* [[LIN2]], align 4 -// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4 +// CHECK20-NEXT: store i32 [[CONV7]], i32* [[LIN2]], align 4, !llvm.access.group !18 +// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4, !llvm.access.group !18 // CHECK20-NEXT: [[CONV8:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK20-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK20-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK20-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 +// CHECK20-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !18 // CHECK20-NEXT: [[MUL9:%.*]] = mul i64 [[TMP14]], [[TMP15]] // CHECK20-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK20-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 -// CHECK20-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4 -// CHECK20-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4, !llvm.access.group !18 +// CHECK20-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !18 // CHECK20-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK20-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK20-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK20-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 +// CHECK20-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !18 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK20-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 // CHECK20-NEXT: [[ADD15:%.*]] = add i64 [[TMP17]], 1 -// CHECK20-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_IV]], align 8 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK20-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -20178,32 +20178,32 @@ int bar(int n){ // CHECK20-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK20-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK20-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK20-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK20-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK20-NEXT: store i16 [[CONV2]], i16* [[IT]], align 2 -// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK20-NEXT: store i16 [[CONV2]], i16* [[IT]], align 2, !llvm.access.group !21 +// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !21 // CHECK20-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK20-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4 -// CHECK20-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !21 +// CHECK20-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !21 // CHECK20-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK20-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK20-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK20-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4 +// CHECK20-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4, !llvm.access.group !21 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK20-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK20-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -20333,60 +20333,60 @@ int bar(int n){ // CHECK20: omp.dispatch.body: // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK20-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK20-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK20-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK20-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 -// CHECK20-NEXT: store i8 [[CONV]], i8* [[IT]], align 1 -// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK20-NEXT: store i8 [[CONV]], i8* [[IT]], align 1, !llvm.access.group !24 +// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !24 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK20-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4, !llvm.access.group !24 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK20-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK20-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK20-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double // CHECK20-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK20-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK20-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4 +// CHECK20-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK20-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK20-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4 +// CHECK20-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !24 // CHECK20-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double // CHECK20-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 // CHECK20-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK20-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4 +// CHECK20-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !24 // CHECK20-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 // CHECK20-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i32 0, i32 2 -// CHECK20-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8 +// CHECK20-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !24 // CHECK20-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK20-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8 +// CHECK20-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !24 // CHECK20-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] // CHECK20-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP23]] // CHECK20-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i32 3 -// CHECK20-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8 +// CHECK20-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !24 // CHECK20-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK20-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8 +// CHECK20-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !24 // CHECK20-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK20-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4 +// CHECK20-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !24 // CHECK20-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK20-NEXT: store i64 [[ADD20]], i64* [[X]], align 4 +// CHECK20-NEXT: store i64 [[ADD20]], i64* [[X]], align 4, !llvm.access.group !24 // CHECK20-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK20-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4 +// CHECK20-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !24 // CHECK20-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 // CHECK20-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 // CHECK20-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK20-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4 +// CHECK20-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4, !llvm.access.group !24 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK20-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK20-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK20: omp.dispatch.inc: @@ -20541,37 +20541,37 @@ int bar(int n){ // CHECK20-NEXT: store i64 [[TMP8]], i64* [[DOTOMP_IV]], align 8 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK20-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK20-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 +// CHECK20-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !27 // CHECK20-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP9]], [[TMP10]] // CHECK20-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK20-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 // CHECK20-NEXT: [[MUL:%.*]] = mul i64 [[TMP11]], 400 // CHECK20-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK20-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK20-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !27 +// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[B_ADDR]], align 4, !llvm.access.group !27 // CHECK20-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double // CHECK20-NEXT: [[ADD:%.*]] = fadd double [[CONV]], 1.500000e+00 // CHECK20-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK20-NEXT: store double [[ADD]], double* [[A]], align 4 +// CHECK20-NEXT: store double [[ADD]], double* [[A]], align 4, !llvm.access.group !27 // CHECK20-NEXT: [[A4:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK20-NEXT: [[TMP13:%.*]] = load double, double* [[A4]], align 4 +// CHECK20-NEXT: [[TMP13:%.*]] = load double, double* [[A4]], align 4, !llvm.access.group !27 // CHECK20-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK20-NEXT: store double [[INC]], double* [[A4]], align 4 +// CHECK20-NEXT: store double [[INC]], double* [[A4]], align 4, !llvm.access.group !27 // CHECK20-NEXT: [[CONV5:%.*]] = fptosi double [[INC]] to i16 // CHECK20-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP14]] // CHECK20-NEXT: [[ARRAYIDX6:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK20-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2 +// CHECK20-NEXT: store i16 [[CONV5]], i16* [[ARRAYIDX6]], align 2, !llvm.access.group !27 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK20-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 // CHECK20-NEXT: [[ADD7:%.*]] = add i64 [[TMP15]], 1 -// CHECK20-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK20-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -20654,35 +20654,35 @@ int bar(int n){ // CHECK20-NEXT: store i64 [[TMP5]], i64* [[DOTOMP_IV]], align 8 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK20-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK20-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK20-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !30 // CHECK20-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] // CHECK20-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK20-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] -// CHECK20-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK20-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !30 +// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !30 // CHECK20-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK20-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK20-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !30 +// CHECK20-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !30 // CHECK20-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK20-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK20-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK20-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK20-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !30 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !30 // CHECK20-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK20-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK20-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !30 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK20-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 // CHECK20-NEXT: [[ADD7:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK20-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK20-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !30 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -20740,23 +20740,23 @@ int bar(int n){ // CHECK21-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] -// CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK21-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK21-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: @@ -20859,44 +20859,44 @@ int bar(int n){ // CHECK21-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_IV]], align 8 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK21-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 +// CHECK21-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !17 // CHECK21-NEXT: [[CMP6:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] // CHECK21-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK21-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 // CHECK21-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 // CHECK21-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK21-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 +// CHECK21-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !17 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !17 // CHECK21-NEXT: [[CONV7:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK21-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK21-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK21-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 +// CHECK21-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !17 // CHECK21-NEXT: [[MUL8:%.*]] = mul i64 [[TMP11]], [[TMP12]] // CHECK21-NEXT: [[ADD:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK21-NEXT: [[CONV9:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK21-NEXT: store i32 [[CONV9]], i32* [[LIN4]], align 4 -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START3]], align 4 +// CHECK21-NEXT: store i32 [[CONV9]], i32* [[LIN4]], align 4, !llvm.access.group !17 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START3]], align 4, !llvm.access.group !17 // CHECK21-NEXT: [[CONV10:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK21-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK21-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK21-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 +// CHECK21-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !17 // CHECK21-NEXT: [[MUL11:%.*]] = mul i64 [[TMP14]], [[TMP15]] // CHECK21-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK21-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 -// CHECK21-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4 -// CHECK21-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK21-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4, !llvm.access.group !17 +// CHECK21-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !17 // CHECK21-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK21-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK21-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK21-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8 +// CHECK21-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8, !llvm.access.group !17 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK21-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 // CHECK21-NEXT: [[ADD17:%.*]] = add i64 [[TMP17]], 1 -// CHECK21-NEXT: store i64 [[ADD17]], i64* [[DOTOMP_IV]], align 8 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK21-NEXT: store i64 [[ADD17]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: @@ -21002,32 +21002,32 @@ int bar(int n){ // CHECK21-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 // CHECK21-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK21-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK21-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK21-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2, !llvm.access.group !20 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !20 // CHECK21-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK21-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8 -// CHECK21-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK21-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !20 +// CHECK21-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !20 // CHECK21-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK21-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK21-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK21-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8 +// CHECK21-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8, !llvm.access.group !20 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK21-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK21-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK21-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: @@ -21163,60 +21163,60 @@ int bar(int n){ // CHECK21: omp.dispatch.body: // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK21-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK21-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK21-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK21-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK21-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 -// CHECK21-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1 -// CHECK21-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1, !llvm.access.group !23 +// CHECK21-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !23 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK21-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK21-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8, !llvm.access.group !23 // CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK21-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK21-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !23 // CHECK21-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double // CHECK21-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 // CHECK21-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK21-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4 +// CHECK21-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4, !llvm.access.group !23 // CHECK21-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK21-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4 +// CHECK21-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4, !llvm.access.group !23 // CHECK21-NEXT: [[CONV13:%.*]] = fpext float [[TMP21]] to double // CHECK21-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.000000e+00 // CHECK21-NEXT: [[CONV15:%.*]] = fptrunc double [[ADD14]] to float -// CHECK21-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4 +// CHECK21-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4, !llvm.access.group !23 // CHECK21-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 // CHECK21-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX16]], i64 0, i64 2 -// CHECK21-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8 +// CHECK21-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !23 // CHECK21-NEXT: [[ADD18:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK21-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8 +// CHECK21-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !23 // CHECK21-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] // CHECK21-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP23]] // CHECK21-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX19]], i64 3 -// CHECK21-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8 +// CHECK21-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8, !llvm.access.group !23 // CHECK21-NEXT: [[ADD21:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK21-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8 +// CHECK21-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8, !llvm.access.group !23 // CHECK21-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK21-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8 +// CHECK21-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !23 // CHECK21-NEXT: [[ADD22:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK21-NEXT: store i64 [[ADD22]], i64* [[X]], align 8 +// CHECK21-NEXT: store i64 [[ADD22]], i64* [[X]], align 8, !llvm.access.group !23 // CHECK21-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK21-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8 +// CHECK21-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !23 // CHECK21-NEXT: [[CONV23:%.*]] = sext i8 [[TMP26]] to i32 // CHECK21-NEXT: [[ADD24:%.*]] = add nsw i32 [[CONV23]], 1 // CHECK21-NEXT: [[CONV25:%.*]] = trunc i32 [[ADD24]] to i8 -// CHECK21-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8 +// CHECK21-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8, !llvm.access.group !23 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK21-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK21-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK21-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK21: omp.dispatch.inc: @@ -21410,37 +21410,37 @@ int bar(int n){ // CHECK21-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_IV]], align 8 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK21-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 +// CHECK21-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !26 // CHECK21-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP10]], [[TMP11]] // CHECK21-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK21-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 // CHECK21-NEXT: [[MUL:%.*]] = mul i64 [[TMP12]], 400 // CHECK21-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK21-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !26 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !26 // CHECK21-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP13]] to double // CHECK21-NEXT: [[ADD:%.*]] = fadd double [[CONV5]], 1.500000e+00 // CHECK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: store double [[ADD]], double* [[A]], align 8, !nontemporal !16 +// CHECK21-NEXT: store double [[ADD]], double* [[A]], align 8, !nontemporal !27, !llvm.access.group !26 // CHECK21-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: [[TMP14:%.*]] = load double, double* [[A6]], align 8, !nontemporal !16 +// CHECK21-NEXT: [[TMP14:%.*]] = load double, double* [[A6]], align 8, !nontemporal !27, !llvm.access.group !26 // CHECK21-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK21-NEXT: store double [[INC]], double* [[A6]], align 8, !nontemporal !16 +// CHECK21-NEXT: store double [[INC]], double* [[A6]], align 8, !nontemporal !27, !llvm.access.group !26 // CHECK21-NEXT: [[CONV7:%.*]] = fptosi double [[INC]] to i16 // CHECK21-NEXT: [[TMP15:%.*]] = mul nsw i64 1, [[TMP2]] // CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP15]] // CHECK21-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK21-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2 +// CHECK21-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2, !llvm.access.group !26 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK21-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 // CHECK21-NEXT: [[ADD9:%.*]] = add i64 [[TMP16]], 1 -// CHECK21-NEXT: store i64 [[ADD9]], i64* [[DOTOMP_IV]], align 8 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK21-NEXT: store i64 [[ADD9]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_IF_END:%.*]] // CHECK21: omp_if.else: @@ -21492,7 +21492,7 @@ int bar(int n){ // CHECK21-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK21-NEXT: [[ADD30:%.*]] = add i64 [[TMP28]], 1 // CHECK21-NEXT: store i64 [[ADD30]], i64* [[DOTOMP_IV]], align 8 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND15]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND15]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK21: omp.inner.for.end31: // CHECK21-NEXT: br label [[OMP_IF_END]] // CHECK21: omp_if.end: @@ -21582,35 +21582,35 @@ int bar(int n){ // CHECK21-NEXT: store i64 [[TMP5]], i64* [[DOTOMP_IV]], align 8 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK21-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK21-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !32 +// CHECK21-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !32 // CHECK21-NEXT: [[CMP2:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] // CHECK21-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK21-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !32 // CHECK21-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] -// CHECK21-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !32 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !32 // CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK21-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK21-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK21-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !32 +// CHECK21-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !32 // CHECK21-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK21-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK21-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK21-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !32 // CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK21-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK21-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4 +// CHECK21-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK21-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !32 // CHECK21-NEXT: [[ADD8:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK21-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK21-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !32 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: @@ -21668,23 +21668,23 @@ int bar(int n){ // CHECK22-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK22-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK22: omp.inner.for.cond: -// CHECK22-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK22-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK22-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK22-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK22-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK22: omp.inner.for.body: -// CHECK22-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK22-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK22-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 // CHECK22-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] -// CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK22-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK22: omp.body.continue: // CHECK22-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK22: omp.inner.for.inc: -// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK22-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK22-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK22-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK22: omp.inner.for.end: // CHECK22-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK22: omp.loop.exit: @@ -21787,44 +21787,44 @@ int bar(int n){ // CHECK22-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_IV]], align 8 // CHECK22-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK22: omp.inner.for.cond: -// CHECK22-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK22-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK22-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 +// CHECK22-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !17 // CHECK22-NEXT: [[CMP6:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] // CHECK22-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK22: omp.inner.for.body: -// CHECK22-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK22-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 // CHECK22-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 // CHECK22-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK22-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK22-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 +// CHECK22-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !17 +// CHECK22-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !17 // CHECK22-NEXT: [[CONV7:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK22-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK22-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK22-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 +// CHECK22-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !17 // CHECK22-NEXT: [[MUL8:%.*]] = mul i64 [[TMP11]], [[TMP12]] // CHECK22-NEXT: [[ADD:%.*]] = add i64 [[CONV7]], [[MUL8]] // CHECK22-NEXT: [[CONV9:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK22-NEXT: store i32 [[CONV9]], i32* [[LIN4]], align 4 -// CHECK22-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START3]], align 4 +// CHECK22-NEXT: store i32 [[CONV9]], i32* [[LIN4]], align 4, !llvm.access.group !17 +// CHECK22-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START3]], align 4, !llvm.access.group !17 // CHECK22-NEXT: [[CONV10:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK22-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK22-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK22-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 +// CHECK22-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !17 // CHECK22-NEXT: [[MUL11:%.*]] = mul i64 [[TMP14]], [[TMP15]] // CHECK22-NEXT: [[ADD12:%.*]] = add i64 [[CONV10]], [[MUL11]] // CHECK22-NEXT: [[CONV13:%.*]] = trunc i64 [[ADD12]] to i32 -// CHECK22-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4 -// CHECK22-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK22-NEXT: store i32 [[CONV13]], i32* [[A5]], align 4, !llvm.access.group !17 +// CHECK22-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !17 // CHECK22-NEXT: [[CONV14:%.*]] = sext i16 [[TMP16]] to i32 // CHECK22-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK22-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK22-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8 +// CHECK22-NEXT: store i16 [[CONV16]], i16* [[CONV]], align 8, !llvm.access.group !17 // CHECK22-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK22: omp.body.continue: // CHECK22-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK22: omp.inner.for.inc: -// CHECK22-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK22-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 // CHECK22-NEXT: [[ADD17:%.*]] = add i64 [[TMP17]], 1 -// CHECK22-NEXT: store i64 [[ADD17]], i64* [[DOTOMP_IV]], align 8 -// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK22-NEXT: store i64 [[ADD17]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !17 +// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK22: omp.inner.for.end: // CHECK22-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK22: omp.loop.exit: @@ -21930,32 +21930,32 @@ int bar(int n){ // CHECK22-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK22-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK22: omp.inner.for.cond: -// CHECK22-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK22-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK22-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 // CHECK22-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK22-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK22: omp.inner.for.body: -// CHECK22-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK22-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK22-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 // CHECK22-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK22-NEXT: [[CONV3:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK22-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2 -// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: store i16 [[CONV3]], i16* [[IT]], align 2, !llvm.access.group !20 +// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !20 // CHECK22-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK22-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8 -// CHECK22-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK22-NEXT: store i32 [[ADD4]], i32* [[CONV]], align 8, !llvm.access.group !20 +// CHECK22-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !20 // CHECK22-NEXT: [[CONV5:%.*]] = sext i16 [[TMP9]] to i32 // CHECK22-NEXT: [[ADD6:%.*]] = add nsw i32 [[CONV5]], 1 // CHECK22-NEXT: [[CONV7:%.*]] = trunc i32 [[ADD6]] to i16 -// CHECK22-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8 +// CHECK22-NEXT: store i16 [[CONV7]], i16* [[CONV1]], align 8, !llvm.access.group !20 // CHECK22-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK22: omp.body.continue: // CHECK22-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK22: omp.inner.for.inc: -// CHECK22-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK22-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK22-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK22-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK22-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK22: omp.inner.for.end: // CHECK22-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK22: omp.loop.exit: @@ -22091,60 +22091,60 @@ int bar(int n){ // CHECK22: omp.dispatch.body: // CHECK22-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK22: omp.inner.for.cond: -// CHECK22-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK22-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK22-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK22-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK22-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK22: omp.inner.for.body: -// CHECK22-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK22-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK22-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK22-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK22-NEXT: [[CONV8:%.*]] = trunc i32 [[SUB]] to i8 -// CHECK22-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1 -// CHECK22-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: store i8 [[CONV8]], i8* [[IT]], align 1, !llvm.access.group !23 +// CHECK22-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !23 // CHECK22-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK22-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8 +// CHECK22-NEXT: store i32 [[ADD]], i32* [[CONV]], align 8, !llvm.access.group !23 // CHECK22-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK22-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK22-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !23 // CHECK22-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double // CHECK22-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 // CHECK22-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK22-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4 +// CHECK22-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4, !llvm.access.group !23 // CHECK22-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK22-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4 +// CHECK22-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4, !llvm.access.group !23 // CHECK22-NEXT: [[CONV13:%.*]] = fpext float [[TMP21]] to double // CHECK22-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.000000e+00 // CHECK22-NEXT: [[CONV15:%.*]] = fptrunc double [[ADD14]] to float -// CHECK22-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4 +// CHECK22-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4, !llvm.access.group !23 // CHECK22-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 // CHECK22-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX16]], i64 0, i64 2 -// CHECK22-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8 +// CHECK22-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !23 // CHECK22-NEXT: [[ADD18:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK22-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8 +// CHECK22-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !23 // CHECK22-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] // CHECK22-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP23]] // CHECK22-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX19]], i64 3 -// CHECK22-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8 +// CHECK22-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8, !llvm.access.group !23 // CHECK22-NEXT: [[ADD21:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK22-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8 +// CHECK22-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8, !llvm.access.group !23 // CHECK22-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK22-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8 +// CHECK22-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !23 // CHECK22-NEXT: [[ADD22:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK22-NEXT: store i64 [[ADD22]], i64* [[X]], align 8 +// CHECK22-NEXT: store i64 [[ADD22]], i64* [[X]], align 8, !llvm.access.group !23 // CHECK22-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK22-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8 +// CHECK22-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !23 // CHECK22-NEXT: [[CONV23:%.*]] = sext i8 [[TMP26]] to i32 // CHECK22-NEXT: [[ADD24:%.*]] = add nsw i32 [[CONV23]], 1 // CHECK22-NEXT: [[CONV25:%.*]] = trunc i32 [[ADD24]] to i8 -// CHECK22-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8 +// CHECK22-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8, !llvm.access.group !23 // CHECK22-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK22: omp.body.continue: // CHECK22-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK22: omp.inner.for.inc: -// CHECK22-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK22-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK22-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK22-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK22-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK22: omp.inner.for.end: // CHECK22-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK22: omp.dispatch.inc: @@ -22338,37 +22338,37 @@ int bar(int n){ // CHECK22-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_IV]], align 8 // CHECK22-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK22: omp.inner.for.cond: -// CHECK22-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK22-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK22-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 +// CHECK22-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !26 // CHECK22-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP10]], [[TMP11]] // CHECK22-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK22: omp.inner.for.body: -// CHECK22-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK22-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 // CHECK22-NEXT: [[MUL:%.*]] = mul i64 [[TMP12]], 400 // CHECK22-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK22-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK22-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !26 +// CHECK22-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !26 // CHECK22-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP13]] to double // CHECK22-NEXT: [[ADD:%.*]] = fadd double [[CONV5]], 1.500000e+00 // CHECK22-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK22-NEXT: store double [[ADD]], double* [[A]], align 8, !nontemporal !16 +// CHECK22-NEXT: store double [[ADD]], double* [[A]], align 8, !nontemporal !27, !llvm.access.group !26 // CHECK22-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK22-NEXT: [[TMP14:%.*]] = load double, double* [[A6]], align 8, !nontemporal !16 +// CHECK22-NEXT: [[TMP14:%.*]] = load double, double* [[A6]], align 8, !nontemporal !27, !llvm.access.group !26 // CHECK22-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK22-NEXT: store double [[INC]], double* [[A6]], align 8, !nontemporal !16 +// CHECK22-NEXT: store double [[INC]], double* [[A6]], align 8, !nontemporal !27, !llvm.access.group !26 // CHECK22-NEXT: [[CONV7:%.*]] = fptosi double [[INC]] to i16 // CHECK22-NEXT: [[TMP15:%.*]] = mul nsw i64 1, [[TMP2]] // CHECK22-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP15]] // CHECK22-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK22-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2 +// CHECK22-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2, !llvm.access.group !26 // CHECK22-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK22: omp.body.continue: // CHECK22-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK22: omp.inner.for.inc: -// CHECK22-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK22-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 // CHECK22-NEXT: [[ADD9:%.*]] = add i64 [[TMP16]], 1 -// CHECK22-NEXT: store i64 [[ADD9]], i64* [[DOTOMP_IV]], align 8 -// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK22-NEXT: store i64 [[ADD9]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !26 +// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK22: omp.inner.for.end: // CHECK22-NEXT: br label [[OMP_IF_END:%.*]] // CHECK22: omp_if.else: @@ -22420,7 +22420,7 @@ int bar(int n){ // CHECK22-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK22-NEXT: [[ADD30:%.*]] = add i64 [[TMP28]], 1 // CHECK22-NEXT: store i64 [[ADD30]], i64* [[DOTOMP_IV]], align 8 -// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND15]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND15]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK22: omp.inner.for.end31: // CHECK22-NEXT: br label [[OMP_IF_END]] // CHECK22: omp_if.end: @@ -22510,35 +22510,35 @@ int bar(int n){ // CHECK22-NEXT: store i64 [[TMP5]], i64* [[DOTOMP_IV]], align 8 // CHECK22-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK22: omp.inner.for.cond: -// CHECK22-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK22-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK22-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !32 +// CHECK22-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !32 // CHECK22-NEXT: [[CMP2:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] // CHECK22-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK22: omp.inner.for.body: -// CHECK22-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK22-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !32 // CHECK22-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK22-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] -// CHECK22-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK22-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !32 +// CHECK22-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !32 // CHECK22-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK22-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK22-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK22-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !32 +// CHECK22-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !32 // CHECK22-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK22-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK22-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK22-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK22-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !32 // CHECK22-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK22-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK22-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK22-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK22-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4 +// CHECK22-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK22-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK22: omp.body.continue: // CHECK22-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK22: omp.inner.for.inc: -// CHECK22-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK22-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !32 // CHECK22-NEXT: [[ADD8:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK22-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8 -// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK22-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !32 +// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK22: omp.inner.for.end: // CHECK22-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK22: omp.loop.exit: @@ -22596,23 +22596,23 @@ int bar(int n){ // CHECK23-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] -// CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK23-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK23-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: @@ -22709,44 +22709,44 @@ int bar(int n){ // CHECK23-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_IV]], align 8 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK23-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 +// CHECK23-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !18 // CHECK23-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] // CHECK23-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK23-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 // CHECK23-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 // CHECK23-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK23-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 +// CHECK23-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !18 +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !18 // CHECK23-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK23-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK23-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK23-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 +// CHECK23-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !18 // CHECK23-NEXT: [[MUL6:%.*]] = mul i64 [[TMP11]], [[TMP12]] // CHECK23-NEXT: [[ADD:%.*]] = add i64 [[CONV5]], [[MUL6]] // CHECK23-NEXT: [[CONV7:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK23-NEXT: store i32 [[CONV7]], i32* [[LIN2]], align 4 -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4 +// CHECK23-NEXT: store i32 [[CONV7]], i32* [[LIN2]], align 4, !llvm.access.group !18 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4, !llvm.access.group !18 // CHECK23-NEXT: [[CONV8:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK23-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK23-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK23-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 +// CHECK23-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !18 // CHECK23-NEXT: [[MUL9:%.*]] = mul i64 [[TMP14]], [[TMP15]] // CHECK23-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK23-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 -// CHECK23-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4 -// CHECK23-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK23-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4, !llvm.access.group !18 +// CHECK23-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !18 // CHECK23-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK23-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK23-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK23-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 +// CHECK23-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !18 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK23-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 // CHECK23-NEXT: [[ADD15:%.*]] = add i64 [[TMP17]], 1 -// CHECK23-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_IV]], align 8 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK23-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: @@ -22849,32 +22849,32 @@ int bar(int n){ // CHECK23-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK23-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK23-NEXT: store i16 [[CONV2]], i16* [[IT]], align 2 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK23-NEXT: store i16 [[CONV2]], i16* [[IT]], align 2, !llvm.access.group !21 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !21 // CHECK23-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK23-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK23-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !21 +// CHECK23-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !21 // CHECK23-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK23-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK23-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK23-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4 +// CHECK23-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4, !llvm.access.group !21 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK23-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK23-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK23-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: @@ -23004,60 +23004,60 @@ int bar(int n){ // CHECK23: omp.dispatch.body: // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK23-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK23-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK23-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK23-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 -// CHECK23-NEXT: store i8 [[CONV]], i8* [[IT]], align 1 -// CHECK23-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK23-NEXT: store i8 [[CONV]], i8* [[IT]], align 1, !llvm.access.group !24 +// CHECK23-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !24 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK23-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4, !llvm.access.group !24 // CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK23-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK23-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK23-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double // CHECK23-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK23-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK23-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4 +// CHECK23-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK23-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK23-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4 +// CHECK23-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !24 // CHECK23-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double // CHECK23-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 // CHECK23-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK23-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4 +// CHECK23-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !24 // CHECK23-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 // CHECK23-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i32 0, i32 2 -// CHECK23-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8 +// CHECK23-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !24 // CHECK23-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK23-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8 +// CHECK23-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !24 // CHECK23-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] // CHECK23-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP23]] // CHECK23-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i32 3 -// CHECK23-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8 +// CHECK23-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !24 // CHECK23-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK23-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8 +// CHECK23-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !24 // CHECK23-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK23-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4 +// CHECK23-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !24 // CHECK23-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK23-NEXT: store i64 [[ADD20]], i64* [[X]], align 4 +// CHECK23-NEXT: store i64 [[ADD20]], i64* [[X]], align 4, !llvm.access.group !24 // CHECK23-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK23-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4 +// CHECK23-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !24 // CHECK23-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 // CHECK23-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 // CHECK23-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK23-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4 +// CHECK23-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4, !llvm.access.group !24 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK23-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK23-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK23-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK23: omp.dispatch.inc: @@ -23245,37 +23245,37 @@ int bar(int n){ // CHECK23-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_IV]], align 8 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK23-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 +// CHECK23-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !27 // CHECK23-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP10]], [[TMP11]] // CHECK23-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK23-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 // CHECK23-NEXT: [[MUL:%.*]] = mul i64 [[TMP12]], 400 // CHECK23-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK23-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK23-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !27 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[B_ADDR]], align 4, !llvm.access.group !27 // CHECK23-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP13]] to double // CHECK23-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK23-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: store double [[ADD]], double* [[A]], align 4, !nontemporal !17 +// CHECK23-NEXT: store double [[ADD]], double* [[A]], align 4, !nontemporal !28, !llvm.access.group !27 // CHECK23-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: [[TMP14:%.*]] = load double, double* [[A5]], align 4, !nontemporal !17 +// CHECK23-NEXT: [[TMP14:%.*]] = load double, double* [[A5]], align 4, !nontemporal !28, !llvm.access.group !27 // CHECK23-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK23-NEXT: store double [[INC]], double* [[A5]], align 4, !nontemporal !17 +// CHECK23-NEXT: store double [[INC]], double* [[A5]], align 4, !nontemporal !28, !llvm.access.group !27 // CHECK23-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 // CHECK23-NEXT: [[TMP15:%.*]] = mul nsw i32 1, [[TMP2]] // CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP15]] // CHECK23-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK23-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// CHECK23-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2, !llvm.access.group !27 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK23-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 // CHECK23-NEXT: [[ADD8:%.*]] = add i64 [[TMP16]], 1 -// CHECK23-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK23-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_IF_END:%.*]] // CHECK23: omp_if.else: @@ -23327,7 +23327,7 @@ int bar(int n){ // CHECK23-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK23-NEXT: [[ADD29:%.*]] = add i64 [[TMP28]], 1 // CHECK23-NEXT: store i64 [[ADD29]], i64* [[DOTOMP_IV]], align 8 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND14]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND14]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK23: omp.inner.for.end30: // CHECK23-NEXT: br label [[OMP_IF_END]] // CHECK23: omp_if.end: @@ -23414,35 +23414,35 @@ int bar(int n){ // CHECK23-NEXT: store i64 [[TMP5]], i64* [[DOTOMP_IV]], align 8 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK23-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK23-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !33 +// CHECK23-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !33 // CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK23-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !33 // CHECK23-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] -// CHECK23-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK23-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !33 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !33 // CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK23-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK23-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !33 +// CHECK23-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !33 // CHECK23-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK23-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK23-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK23-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK23-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !33 // CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK23-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK23-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK23-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK23-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !33 // CHECK23-NEXT: [[ADD7:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK23-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK23-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !33 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: @@ -23500,23 +23500,23 @@ int bar(int n){ // CHECK24-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK24-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK24: omp.inner.for.cond: -// CHECK24-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK24-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK24-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK24-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK24-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK24: omp.inner.for.body: -// CHECK24-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK24-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK24-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 5 // CHECK24-NEXT: [[ADD:%.*]] = add nsw i32 3, [[MUL]] -// CHECK24-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK24-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 // CHECK24-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK24: omp.body.continue: // CHECK24-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK24: omp.inner.for.inc: -// CHECK24-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK24-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK24-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK24-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK24-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK24: omp.inner.for.end: // CHECK24-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK24: omp.loop.exit: @@ -23613,44 +23613,44 @@ int bar(int n){ // CHECK24-NEXT: store i64 [[TMP6]], i64* [[DOTOMP_IV]], align 8 // CHECK24-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK24: omp.inner.for.cond: -// CHECK24-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK24-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK24-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 +// CHECK24-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !18 // CHECK24-NEXT: [[CMP4:%.*]] = icmp ule i64 [[TMP7]], [[TMP8]] // CHECK24-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK24: omp.inner.for.body: -// CHECK24-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK24-NEXT: [[TMP9:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 // CHECK24-NEXT: [[MUL:%.*]] = mul i64 [[TMP9]], 400 // CHECK24-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK24-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK24-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4 +// CHECK24-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !18 +// CHECK24-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTLINEAR_START]], align 4, !llvm.access.group !18 // CHECK24-NEXT: [[CONV5:%.*]] = sext i32 [[TMP10]] to i64 -// CHECK24-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK24-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK24-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 +// CHECK24-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !18 // CHECK24-NEXT: [[MUL6:%.*]] = mul i64 [[TMP11]], [[TMP12]] // CHECK24-NEXT: [[ADD:%.*]] = add i64 [[CONV5]], [[MUL6]] // CHECK24-NEXT: [[CONV7:%.*]] = trunc i64 [[ADD]] to i32 -// CHECK24-NEXT: store i32 [[CONV7]], i32* [[LIN2]], align 4 -// CHECK24-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4 +// CHECK24-NEXT: store i32 [[CONV7]], i32* [[LIN2]], align 4, !llvm.access.group !18 +// CHECK24-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTLINEAR_START1]], align 4, !llvm.access.group !18 // CHECK24-NEXT: [[CONV8:%.*]] = sext i32 [[TMP13]] to i64 -// CHECK24-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK24-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8 +// CHECK24-NEXT: [[TMP14:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 +// CHECK24-NEXT: [[TMP15:%.*]] = load i64, i64* [[DOTLINEAR_STEP]], align 8, !llvm.access.group !18 // CHECK24-NEXT: [[MUL9:%.*]] = mul i64 [[TMP14]], [[TMP15]] // CHECK24-NEXT: [[ADD10:%.*]] = add i64 [[CONV8]], [[MUL9]] // CHECK24-NEXT: [[CONV11:%.*]] = trunc i64 [[ADD10]] to i32 -// CHECK24-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4 -// CHECK24-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK24-NEXT: store i32 [[CONV11]], i32* [[A3]], align 4, !llvm.access.group !18 +// CHECK24-NEXT: [[TMP16:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !18 // CHECK24-NEXT: [[CONV12:%.*]] = sext i16 [[TMP16]] to i32 // CHECK24-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK24-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK24-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 +// CHECK24-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !18 // CHECK24-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK24: omp.body.continue: // CHECK24-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK24: omp.inner.for.inc: -// CHECK24-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK24-NEXT: [[TMP17:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 // CHECK24-NEXT: [[ADD15:%.*]] = add i64 [[TMP17]], 1 -// CHECK24-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_IV]], align 8 -// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK24-NEXT: store i64 [[ADD15]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !18 +// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK24: omp.inner.for.end: // CHECK24-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK24: omp.loop.exit: @@ -23753,32 +23753,32 @@ int bar(int n){ // CHECK24-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK24-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK24: omp.inner.for.cond: -// CHECK24-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK24-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK24-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK24-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK24-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK24: omp.inner.for.body: -// CHECK24-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK24-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK24-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 4 // CHECK24-NEXT: [[ADD:%.*]] = add nsw i32 6, [[MUL]] // CHECK24-NEXT: [[CONV2:%.*]] = trunc i32 [[ADD]] to i16 -// CHECK24-NEXT: store i16 [[CONV2]], i16* [[IT]], align 2 -// CHECK24-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK24-NEXT: store i16 [[CONV2]], i16* [[IT]], align 2, !llvm.access.group !21 +// CHECK24-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !21 // CHECK24-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK24-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4 -// CHECK24-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK24-NEXT: store i32 [[ADD3]], i32* [[A_ADDR]], align 4, !llvm.access.group !21 +// CHECK24-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !21 // CHECK24-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK24-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK24-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK24-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4 +// CHECK24-NEXT: store i16 [[CONV6]], i16* [[CONV]], align 4, !llvm.access.group !21 // CHECK24-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK24: omp.body.continue: // CHECK24-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK24: omp.inner.for.inc: -// CHECK24-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK24-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK24-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK24-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK24-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK24: omp.inner.for.end: // CHECK24-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK24: omp.loop.exit: @@ -23908,60 +23908,60 @@ int bar(int n){ // CHECK24: omp.dispatch.body: // CHECK24-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK24: omp.inner.for.cond: -// CHECK24-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK24-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK24-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK24-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK24-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK24: omp.inner.for.body: -// CHECK24-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK24-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK24-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK24-NEXT: [[SUB:%.*]] = sub nsw i32 122, [[MUL]] // CHECK24-NEXT: [[CONV:%.*]] = trunc i32 [[SUB]] to i8 -// CHECK24-NEXT: store i8 [[CONV]], i8* [[IT]], align 1 -// CHECK24-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK24-NEXT: store i8 [[CONV]], i8* [[IT]], align 1, !llvm.access.group !24 +// CHECK24-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !24 // CHECK24-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK24-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4 +// CHECK24-NEXT: store i32 [[ADD]], i32* [[A_ADDR]], align 4, !llvm.access.group !24 // CHECK24-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK24-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK24-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK24-NEXT: [[CONV7:%.*]] = fpext float [[TMP20]] to double // CHECK24-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK24-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK24-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4 +// CHECK24-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK24-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK24-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4 +// CHECK24-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !24 // CHECK24-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double // CHECK24-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 // CHECK24-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK24-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4 +// CHECK24-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !24 // CHECK24-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 // CHECK24-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i32 0, i32 2 -// CHECK24-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8 +// CHECK24-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !24 // CHECK24-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK24-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8 +// CHECK24-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !24 // CHECK24-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] // CHECK24-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP23]] // CHECK24-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i32 3 -// CHECK24-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8 +// CHECK24-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !24 // CHECK24-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK24-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8 +// CHECK24-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !24 // CHECK24-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK24-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4 +// CHECK24-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !24 // CHECK24-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK24-NEXT: store i64 [[ADD20]], i64* [[X]], align 4 +// CHECK24-NEXT: store i64 [[ADD20]], i64* [[X]], align 4, !llvm.access.group !24 // CHECK24-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK24-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4 +// CHECK24-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !24 // CHECK24-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 // CHECK24-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 // CHECK24-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK24-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4 +// CHECK24-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4, !llvm.access.group !24 // CHECK24-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK24: omp.body.continue: // CHECK24-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK24: omp.inner.for.inc: -// CHECK24-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK24-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK24-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK24-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK24-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK24: omp.inner.for.end: // CHECK24-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK24: omp.dispatch.inc: @@ -24149,37 +24149,37 @@ int bar(int n){ // CHECK24-NEXT: store i64 [[TMP9]], i64* [[DOTOMP_IV]], align 8 // CHECK24-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK24: omp.inner.for.cond: -// CHECK24-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK24-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK24-NEXT: [[TMP10:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 +// CHECK24-NEXT: [[TMP11:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !27 // CHECK24-NEXT: [[CMP3:%.*]] = icmp ule i64 [[TMP10]], [[TMP11]] // CHECK24-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK24: omp.inner.for.body: -// CHECK24-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK24-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 // CHECK24-NEXT: [[MUL:%.*]] = mul i64 [[TMP12]], 400 // CHECK24-NEXT: [[SUB:%.*]] = sub i64 2000, [[MUL]] -// CHECK24-NEXT: store i64 [[SUB]], i64* [[IT]], align 8 -// CHECK24-NEXT: [[TMP13:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK24-NEXT: store i64 [[SUB]], i64* [[IT]], align 8, !llvm.access.group !27 +// CHECK24-NEXT: [[TMP13:%.*]] = load i32, i32* [[B_ADDR]], align 4, !llvm.access.group !27 // CHECK24-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP13]] to double // CHECK24-NEXT: [[ADD:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK24-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK24-NEXT: store double [[ADD]], double* [[A]], align 4, !nontemporal !17 +// CHECK24-NEXT: store double [[ADD]], double* [[A]], align 4, !nontemporal !28, !llvm.access.group !27 // CHECK24-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK24-NEXT: [[TMP14:%.*]] = load double, double* [[A5]], align 4, !nontemporal !17 +// CHECK24-NEXT: [[TMP14:%.*]] = load double, double* [[A5]], align 4, !nontemporal !28, !llvm.access.group !27 // CHECK24-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK24-NEXT: store double [[INC]], double* [[A5]], align 4, !nontemporal !17 +// CHECK24-NEXT: store double [[INC]], double* [[A5]], align 4, !nontemporal !28, !llvm.access.group !27 // CHECK24-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 // CHECK24-NEXT: [[TMP15:%.*]] = mul nsw i32 1, [[TMP2]] // CHECK24-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP15]] // CHECK24-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK24-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// CHECK24-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2, !llvm.access.group !27 // CHECK24-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK24: omp.body.continue: // CHECK24-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK24: omp.inner.for.inc: -// CHECK24-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK24-NEXT: [[TMP16:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 // CHECK24-NEXT: [[ADD8:%.*]] = add i64 [[TMP16]], 1 -// CHECK24-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8 -// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK24-NEXT: store i64 [[ADD8]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !27 +// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK24: omp.inner.for.end: // CHECK24-NEXT: br label [[OMP_IF_END:%.*]] // CHECK24: omp_if.else: @@ -24231,7 +24231,7 @@ int bar(int n){ // CHECK24-NEXT: [[TMP28:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 // CHECK24-NEXT: [[ADD29:%.*]] = add i64 [[TMP28]], 1 // CHECK24-NEXT: store i64 [[ADD29]], i64* [[DOTOMP_IV]], align 8 -// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND14]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND14]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK24: omp.inner.for.end30: // CHECK24-NEXT: br label [[OMP_IF_END]] // CHECK24: omp_if.end: @@ -24318,35 +24318,35 @@ int bar(int n){ // CHECK24-NEXT: store i64 [[TMP5]], i64* [[DOTOMP_IV]], align 8 // CHECK24-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK24: omp.inner.for.cond: -// CHECK24-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 -// CHECK24-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8 +// CHECK24-NEXT: [[TMP6:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !33 +// CHECK24-NEXT: [[TMP7:%.*]] = load i64, i64* [[DOTOMP_UB]], align 8, !llvm.access.group !33 // CHECK24-NEXT: [[CMP1:%.*]] = icmp sle i64 [[TMP6]], [[TMP7]] // CHECK24-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK24: omp.inner.for.body: -// CHECK24-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK24-NEXT: [[TMP8:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !33 // CHECK24-NEXT: [[MUL:%.*]] = mul nsw i64 [[TMP8]], 3 // CHECK24-NEXT: [[ADD:%.*]] = add nsw i64 -10, [[MUL]] -// CHECK24-NEXT: store i64 [[ADD]], i64* [[I]], align 8 -// CHECK24-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK24-NEXT: store i64 [[ADD]], i64* [[I]], align 8, !llvm.access.group !33 +// CHECK24-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !33 // CHECK24-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK24-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK24-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK24-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !33 +// CHECK24-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !33 // CHECK24-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK24-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK24-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK24-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK24-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !33 // CHECK24-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK24-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK24-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK24-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK24-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK24-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK24-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK24: omp.body.continue: // CHECK24-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK24: omp.inner.for.inc: -// CHECK24-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8 +// CHECK24-NEXT: [[TMP12:%.*]] = load i64, i64* [[DOTOMP_IV]], align 8, !llvm.access.group !33 // CHECK24-NEXT: [[ADD7:%.*]] = add nsw i64 [[TMP12]], 1 -// CHECK24-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8 -// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK24-NEXT: store i64 [[ADD7]], i64* [[DOTOMP_IV]], align 8, !llvm.access.group !33 +// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK24: omp.inner.for.end: // CHECK24-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK24: omp.loop.exit: diff --git a/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp b/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp index f2a9f23b10db0..bdf2622c18f8a 100644 --- a/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/target_parallel_reduction_task_codegen.cpp @@ -411,26 +411,26 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !11 +// CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* @@ -464,8 +464,8 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] // CHECK1-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP48:%.*]] = getelementptr i8, i8* [[TMP40]], i64 [[TMP47]] -// CHECK1-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !11 +// CHECK1-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !12 // CHECK1-NEXT: ret i32 0 // // @@ -894,26 +894,26 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK2-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK2-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK2-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !11 +// CHECK2-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK2-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK2-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK2-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK2-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK2-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* @@ -947,8 +947,8 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] // CHECK2-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK2-NEXT: [[TMP48:%.*]] = getelementptr i8, i8* [[TMP40]], i64 [[TMP47]] -// CHECK2-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !11 +// CHECK2-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !12 // CHECK2-NEXT: ret i32 0 // // @@ -1000,4 +1000,3 @@ int main(int argc, char **argv) { // CHECK2: omp.arraycpy.done5: // CHECK2-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_teams_codegen.cpp b/clang/test/OpenMP/target_teams_codegen.cpp index ec5635cbb136a..ebf47fe160d2b 100644 --- a/clang/test/OpenMP/target_teams_codegen.cpp +++ b/clang/test/OpenMP/target_teams_codegen.cpp @@ -767,25 +767,25 @@ int bar(int n){ // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !23 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !23 -// CHECK1-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !23 -// CHECK1-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !23 +// CHECK1-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !24 +// CHECK1-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !24 +// CHECK1-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !24 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i64 0, i64 0 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i64 0, i64 0 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i64 0, i64 0 @@ -799,16 +799,16 @@ int bar(int n){ // CHECK1: omp_offload.failed.i: // CHECK1-NEXT: [[TMP29:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i64* [[AA_CASTED_I]] to i16* -// CHECK1-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !23 -// CHECK1-NEXT: [[TMP30:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !23 +// CHECK1-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !24 +// CHECK1-NEXT: [[TMP30:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !24 // CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP23]], align 4 // CHECK1-NEXT: [[CONV4_I:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED_I]] to i32* -// CHECK1-NEXT: store i32 [[TMP31]], i32* [[CONV4_I]], align 4, !noalias !23 -// CHECK1-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED_I]], align 8, !noalias !23 +// CHECK1-NEXT: store i32 [[TMP31]], i32* [[CONV4_I]], align 4, !noalias !24 +// CHECK1-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED_I]], align 8, !noalias !24 // CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP24]], align 4 // CHECK1-NEXT: [[CONV6_I:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED5_I]] to i32* -// CHECK1-NEXT: store i32 [[TMP33]], i32* [[CONV6_I]], align 4, !noalias !23 -// CHECK1-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED5_I]], align 8, !noalias !23 +// CHECK1-NEXT: store i32 [[TMP33]], i32* [[CONV6_I]], align 4, !noalias !24 +// CHECK1-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED5_I]], align 8, !noalias !24 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101(i64 [[TMP30]], i64 [[TMP32]], i64 [[TMP34]]) #[[ATTR3]] // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK1: .omp_outlined..1.exit: @@ -2150,25 +2150,25 @@ int bar(int n){ // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK2-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !23 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 // CHECK2-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK2-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !23 -// CHECK2-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !23 -// CHECK2-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !23 +// CHECK2-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !24 +// CHECK2-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !24 +// CHECK2-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !24 // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i64 0, i64 0 // CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i64 0, i64 0 // CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i64 0, i64 0 @@ -2182,16 +2182,16 @@ int bar(int n){ // CHECK2: omp_offload.failed.i: // CHECK2-NEXT: [[TMP29:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK2-NEXT: [[CONV_I:%.*]] = bitcast i64* [[AA_CASTED_I]] to i16* -// CHECK2-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !23 -// CHECK2-NEXT: [[TMP30:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !23 +// CHECK2-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !24 +// CHECK2-NEXT: [[TMP30:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !24 // CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP23]], align 4 // CHECK2-NEXT: [[CONV4_I:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED_I]] to i32* -// CHECK2-NEXT: store i32 [[TMP31]], i32* [[CONV4_I]], align 4, !noalias !23 -// CHECK2-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED_I]], align 8, !noalias !23 +// CHECK2-NEXT: store i32 [[TMP31]], i32* [[CONV4_I]], align 4, !noalias !24 +// CHECK2-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED_I]], align 8, !noalias !24 // CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP24]], align 4 // CHECK2-NEXT: [[CONV6_I:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED5_I]] to i32* -// CHECK2-NEXT: store i32 [[TMP33]], i32* [[CONV6_I]], align 4, !noalias !23 -// CHECK2-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED5_I]], align 8, !noalias !23 +// CHECK2-NEXT: store i32 [[TMP33]], i32* [[CONV6_I]], align 4, !noalias !24 +// CHECK2-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED5_I]], align 8, !noalias !24 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101(i64 [[TMP30]], i64 [[TMP32]], i64 [[TMP34]]) #[[ATTR3]] // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK2: .omp_outlined..1.exit: @@ -3524,25 +3524,25 @@ int bar(int n){ // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK3-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 -// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !24 -// CHECK3-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !24 -// CHECK3-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !24 -// CHECK3-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !24 -// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !24 -// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !24 -// CHECK3-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !24 -// CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !24 +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !25 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !25 +// CHECK3-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !25 +// CHECK3-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !25 +// CHECK3-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !25 +// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !25 +// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !25 +// CHECK3-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !25 +// CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !25 // CHECK3-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK3-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !24 -// CHECK3-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !24 -// CHECK3-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !24 -// CHECK3-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !24 +// CHECK3-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !25 +// CHECK3-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !25 +// CHECK3-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !25 +// CHECK3-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !25 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i32 0, i32 0 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i32 0, i32 0 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i32 0, i32 0 @@ -3556,14 +3556,14 @@ int bar(int n){ // CHECK3: omp_offload.failed.i: // CHECK3-NEXT: [[TMP29:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK3-NEXT: [[CONV_I:%.*]] = bitcast i32* [[AA_CASTED_I]] to i16* -// CHECK3-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !24 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !24 +// CHECK3-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !25 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !25 // CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK3-NEXT: store i32 [[TMP31]], i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !24 -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !24 +// CHECK3-NEXT: store i32 [[TMP31]], i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !25 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !25 // CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK3-NEXT: store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !24 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !24 +// CHECK3-NEXT: store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !25 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !25 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101(i32 [[TMP30]], i32 [[TMP32]], i32 [[TMP34]]) #[[ATTR3]] // CHECK3-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK3: .omp_outlined..1.exit: @@ -4866,25 +4866,25 @@ int bar(int n){ // CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK4-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK4-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 -// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !24 -// CHECK4-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !24 -// CHECK4-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !24 -// CHECK4-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !24 -// CHECK4-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !24 -// CHECK4-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !24 -// CHECK4-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !24 -// CHECK4-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !24 +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !25 +// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !25 +// CHECK4-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !25 +// CHECK4-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !25 +// CHECK4-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !25 +// CHECK4-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !25 +// CHECK4-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !25 +// CHECK4-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !25 +// CHECK4-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !25 // CHECK4-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK4-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK4-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !24 -// CHECK4-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !24 -// CHECK4-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !24 -// CHECK4-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !24 +// CHECK4-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !25 +// CHECK4-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !25 +// CHECK4-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !25 +// CHECK4-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !25 // CHECK4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i32 0, i32 0 // CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i32 0, i32 0 // CHECK4-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i32 0, i32 0 @@ -4898,14 +4898,14 @@ int bar(int n){ // CHECK4: omp_offload.failed.i: // CHECK4-NEXT: [[TMP29:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK4-NEXT: [[CONV_I:%.*]] = bitcast i32* [[AA_CASTED_I]] to i16* -// CHECK4-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !24 -// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !24 +// CHECK4-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !25 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !25 // CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK4-NEXT: store i32 [[TMP31]], i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !24 -// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !24 +// CHECK4-NEXT: store i32 [[TMP31]], i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !25 +// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !25 // CHECK4-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK4-NEXT: store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !24 -// CHECK4-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !24 +// CHECK4-NEXT: store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !25 +// CHECK4-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !25 // CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101(i32 [[TMP30]], i32 [[TMP32]], i32 [[TMP34]]) #[[ATTR3]] // CHECK4-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK4: .omp_outlined..1.exit: @@ -8323,25 +8323,25 @@ int bar(int n){ // CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK17-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK17-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) -// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !23 -// CHECK17-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !23 -// CHECK17-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 -// CHECK17-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK17-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !23 -// CHECK17-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK17-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK17-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK17-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 +// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 +// CHECK17-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !24 +// CHECK17-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 +// CHECK17-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 +// CHECK17-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !24 +// CHECK17-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK17-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK17-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 +// CHECK17-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 // CHECK17-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK17-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK17-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !23 -// CHECK17-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !23 -// CHECK17-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !23 -// CHECK17-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !23 +// CHECK17-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !24 +// CHECK17-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !24 +// CHECK17-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !24 +// CHECK17-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !24 // CHECK17-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i64 0, i64 0 // CHECK17-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i64 0, i64 0 // CHECK17-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i64 0, i64 0 @@ -8355,16 +8355,16 @@ int bar(int n){ // CHECK17: omp_offload.failed.i: // CHECK17-NEXT: [[TMP29:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK17-NEXT: [[CONV_I:%.*]] = bitcast i64* [[AA_CASTED_I]] to i16* -// CHECK17-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !23 -// CHECK17-NEXT: [[TMP30:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !23 +// CHECK17-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !24 +// CHECK17-NEXT: [[TMP30:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !24 // CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP23]], align 4 // CHECK17-NEXT: [[CONV4_I:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED_I]] to i32* -// CHECK17-NEXT: store i32 [[TMP31]], i32* [[CONV4_I]], align 4, !noalias !23 -// CHECK17-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED_I]], align 8, !noalias !23 +// CHECK17-NEXT: store i32 [[TMP31]], i32* [[CONV4_I]], align 4, !noalias !24 +// CHECK17-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED_I]], align 8, !noalias !24 // CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP24]], align 4 // CHECK17-NEXT: [[CONV6_I:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED5_I]] to i32* -// CHECK17-NEXT: store i32 [[TMP33]], i32* [[CONV6_I]], align 4, !noalias !23 -// CHECK17-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED5_I]], align 8, !noalias !23 +// CHECK17-NEXT: store i32 [[TMP33]], i32* [[CONV6_I]], align 4, !noalias !24 +// CHECK17-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED5_I]], align 8, !noalias !24 // CHECK17-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101(i64 [[TMP30]], i64 [[TMP32]], i64 [[TMP34]]) #[[ATTR3]] // CHECK17-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK17: .omp_outlined..1.exit: @@ -9706,25 +9706,25 @@ int bar(int n){ // CHECK18-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK18-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK18-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) -// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) -// CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !23 -// CHECK18-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !23 -// CHECK18-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 -// CHECK18-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK18-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !23 -// CHECK18-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK18-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK18-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK18-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 +// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 +// CHECK18-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !24 +// CHECK18-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 +// CHECK18-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 +// CHECK18-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !24 +// CHECK18-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK18-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK18-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 +// CHECK18-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 // CHECK18-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK18-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK18-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !23 -// CHECK18-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !23 -// CHECK18-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !23 -// CHECK18-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !23 +// CHECK18-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !24 +// CHECK18-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !24 +// CHECK18-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !24 +// CHECK18-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !24 // CHECK18-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i64 0, i64 0 // CHECK18-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i64 0, i64 0 // CHECK18-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i64 0, i64 0 @@ -9738,16 +9738,16 @@ int bar(int n){ // CHECK18: omp_offload.failed.i: // CHECK18-NEXT: [[TMP29:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK18-NEXT: [[CONV_I:%.*]] = bitcast i64* [[AA_CASTED_I]] to i16* -// CHECK18-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !23 -// CHECK18-NEXT: [[TMP30:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !23 +// CHECK18-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !24 +// CHECK18-NEXT: [[TMP30:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !24 // CHECK18-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP23]], align 4 // CHECK18-NEXT: [[CONV4_I:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED_I]] to i32* -// CHECK18-NEXT: store i32 [[TMP31]], i32* [[CONV4_I]], align 4, !noalias !23 -// CHECK18-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED_I]], align 8, !noalias !23 +// CHECK18-NEXT: store i32 [[TMP31]], i32* [[CONV4_I]], align 4, !noalias !24 +// CHECK18-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED_I]], align 8, !noalias !24 // CHECK18-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP24]], align 4 // CHECK18-NEXT: [[CONV6_I:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED5_I]] to i32* -// CHECK18-NEXT: store i32 [[TMP33]], i32* [[CONV6_I]], align 4, !noalias !23 -// CHECK18-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED5_I]], align 8, !noalias !23 +// CHECK18-NEXT: store i32 [[TMP33]], i32* [[CONV6_I]], align 4, !noalias !24 +// CHECK18-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED5_I]], align 8, !noalias !24 // CHECK18-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101(i64 [[TMP30]], i64 [[TMP32]], i64 [[TMP34]]) #[[ATTR3]] // CHECK18-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK18: .omp_outlined..1.exit: @@ -11080,25 +11080,25 @@ int bar(int n){ // CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK19-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK19-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) -// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) -// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) -// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 -// CHECK19-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !24 -// CHECK19-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !24 -// CHECK19-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !24 -// CHECK19-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !24 -// CHECK19-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !24 -// CHECK19-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !24 -// CHECK19-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !24 -// CHECK19-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !24 +// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) +// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) +// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) +// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !25 +// CHECK19-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !25 +// CHECK19-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !25 +// CHECK19-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !25 +// CHECK19-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !25 +// CHECK19-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !25 +// CHECK19-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !25 +// CHECK19-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !25 +// CHECK19-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !25 // CHECK19-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK19-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK19-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !24 -// CHECK19-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !24 -// CHECK19-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !24 -// CHECK19-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !24 +// CHECK19-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !25 +// CHECK19-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !25 +// CHECK19-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !25 +// CHECK19-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !25 // CHECK19-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i32 0, i32 0 // CHECK19-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i32 0, i32 0 // CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i32 0, i32 0 @@ -11112,14 +11112,14 @@ int bar(int n){ // CHECK19: omp_offload.failed.i: // CHECK19-NEXT: [[TMP29:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK19-NEXT: [[CONV_I:%.*]] = bitcast i32* [[AA_CASTED_I]] to i16* -// CHECK19-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !24 -// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !24 +// CHECK19-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !25 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !25 // CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK19-NEXT: store i32 [[TMP31]], i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !24 -// CHECK19-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !24 +// CHECK19-NEXT: store i32 [[TMP31]], i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !25 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !25 // CHECK19-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK19-NEXT: store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !24 -// CHECK19-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !24 +// CHECK19-NEXT: store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !25 +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !25 // CHECK19-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101(i32 [[TMP30]], i32 [[TMP32]], i32 [[TMP34]]) #[[ATTR3]] // CHECK19-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK19: .omp_outlined..1.exit: @@ -12422,25 +12422,25 @@ int bar(int n){ // CHECK20-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK20-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK20-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) -// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) -// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) -// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) -// CHECK20-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 -// CHECK20-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !24 -// CHECK20-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !24 -// CHECK20-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !24 -// CHECK20-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !24 -// CHECK20-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !24 -// CHECK20-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !24 -// CHECK20-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !24 -// CHECK20-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !24 +// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) +// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) +// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) +// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) +// CHECK20-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !25 +// CHECK20-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !25 +// CHECK20-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !25 +// CHECK20-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !25 +// CHECK20-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !25 +// CHECK20-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !25 +// CHECK20-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !25 +// CHECK20-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !25 +// CHECK20-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !25 // CHECK20-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK20-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK20-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !24 -// CHECK20-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !24 -// CHECK20-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !24 -// CHECK20-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !24 +// CHECK20-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !25 +// CHECK20-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !25 +// CHECK20-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !25 +// CHECK20-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !25 // CHECK20-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i32 0, i32 0 // CHECK20-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i32 0, i32 0 // CHECK20-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i32 0, i32 0 @@ -12454,14 +12454,14 @@ int bar(int n){ // CHECK20: omp_offload.failed.i: // CHECK20-NEXT: [[TMP29:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK20-NEXT: [[CONV_I:%.*]] = bitcast i32* [[AA_CASTED_I]] to i16* -// CHECK20-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !24 -// CHECK20-NEXT: [[TMP30:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !24 +// CHECK20-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !25 +// CHECK20-NEXT: [[TMP30:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !25 // CHECK20-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK20-NEXT: store i32 [[TMP31]], i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !24 -// CHECK20-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !24 +// CHECK20-NEXT: store i32 [[TMP31]], i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !25 +// CHECK20-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !25 // CHECK20-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK20-NEXT: store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !24 -// CHECK20-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !24 +// CHECK20-NEXT: store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !25 +// CHECK20-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !25 // CHECK20-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l101(i32 [[TMP30]], i32 [[TMP32]], i32 [[TMP34]]) #[[ATTR3]] // CHECK20-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK20: .omp_outlined..1.exit: @@ -15413,4 +15413,3 @@ int bar(int n){ // CHECK28-NEXT: store i32 [[ADD4]], i32* [[ARRAYIDX]], align 4 // CHECK28-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_teams_distribute_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_codegen.cpp index 33347f872b05f..2967cb82e66e2 100644 --- a/clang/test/OpenMP/target_teams_distribute_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_codegen.cpp @@ -781,25 +781,25 @@ int bar(int n){ // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !20 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !20 -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !20 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !20 -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !20 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !20 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !20 -// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !20 -// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !20 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !21 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !21 +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !21 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !21 +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !21 +// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 +// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !21 +// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !21 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !20 -// CHECK1-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !20 -// CHECK1-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !20 -// CHECK1-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !20 +// CHECK1-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !21 +// CHECK1-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !21 +// CHECK1-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !21 +// CHECK1-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !21 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i64 0, i64 0 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i64 0, i64 0 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i64 0, i64 0 @@ -814,16 +814,16 @@ int bar(int n){ // CHECK1: omp_offload.failed.i: // CHECK1-NEXT: [[TMP29:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i64* [[AA_CASTED_I]] to i16* -// CHECK1-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !20 -// CHECK1-NEXT: [[TMP30:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !20 +// CHECK1-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !21 +// CHECK1-NEXT: [[TMP30:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !21 // CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP23]], align 4 // CHECK1-NEXT: [[CONV4_I:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED_I]] to i32* -// CHECK1-NEXT: store i32 [[TMP31]], i32* [[CONV4_I]], align 4, !noalias !20 -// CHECK1-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED_I]], align 8, !noalias !20 +// CHECK1-NEXT: store i32 [[TMP31]], i32* [[CONV4_I]], align 4, !noalias !21 +// CHECK1-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED_I]], align 8, !noalias !21 // CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP24]], align 4 // CHECK1-NEXT: [[CONV6_I:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED5_I]] to i32* -// CHECK1-NEXT: store i32 [[TMP33]], i32* [[CONV6_I]], align 4, !noalias !20 -// CHECK1-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED5_I]], align 8, !noalias !20 +// CHECK1-NEXT: store i32 [[TMP33]], i32* [[CONV6_I]], align 4, !noalias !21 +// CHECK1-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED5_I]], align 8, !noalias !21 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103(i64 [[TMP30]], i64 [[TMP32]], i64 [[TMP34]]) #[[ATTR3]] // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK1: .omp_outlined..1.exit: @@ -1212,59 +1212,59 @@ int bar(int n){ // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK1-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK1-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !22 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !22 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK1-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double // CHECK1-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 // CHECK1-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK1-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK1-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK1-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4, !llvm.access.group !22 // CHECK1-NEXT: [[CONV13:%.*]] = fpext float [[TMP21]] to double // CHECK1-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.000000e+00 // CHECK1-NEXT: [[CONV15:%.*]] = fptrunc double [[ADD14]] to float -// CHECK1-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4, !llvm.access.group !22 // CHECK1-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 // CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX16]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !22 // CHECK1-NEXT: [[ADD18:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !22 // CHECK1-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] // CHECK1-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP23]] // CHECK1-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX19]], i64 3 -// CHECK1-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8, !llvm.access.group !22 // CHECK1-NEXT: [[ADD21:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8, !llvm.access.group !22 // CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !22 // CHECK1-NEXT: [[ADD22:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK1-NEXT: store i64 [[ADD22]], i64* [[X]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: store i64 [[ADD22]], i64* [[X]], align 8, !llvm.access.group !22 // CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !22 // CHECK1-NEXT: [[CONV23:%.*]] = sext i8 [[TMP26]] to i32 // CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[CONV23]], 1 // CHECK1-NEXT: [[CONV25:%.*]] = trunc i32 [[ADD24]] to i8 -// CHECK1-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8, !llvm.access.group !21 +// CHECK1-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8, !llvm.access.group !22 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK1-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK1-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -2501,25 +2501,25 @@ int bar(int n){ // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK2-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !20 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !20 -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !20 -// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !20 -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !20 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !20 -// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !20 -// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !20 -// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !20 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !21 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !21 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !21 +// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !21 +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !21 +// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 +// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 +// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !21 +// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !21 // CHECK2-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK2-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !20 -// CHECK2-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !20 -// CHECK2-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !20 -// CHECK2-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !20 +// CHECK2-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !21 +// CHECK2-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !21 +// CHECK2-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !21 +// CHECK2-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !21 // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i64 0, i64 0 // CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i64 0, i64 0 // CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i64 0, i64 0 @@ -2534,16 +2534,16 @@ int bar(int n){ // CHECK2: omp_offload.failed.i: // CHECK2-NEXT: [[TMP29:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK2-NEXT: [[CONV_I:%.*]] = bitcast i64* [[AA_CASTED_I]] to i16* -// CHECK2-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !20 -// CHECK2-NEXT: [[TMP30:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !20 +// CHECK2-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !21 +// CHECK2-NEXT: [[TMP30:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !21 // CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP23]], align 4 // CHECK2-NEXT: [[CONV4_I:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED_I]] to i32* -// CHECK2-NEXT: store i32 [[TMP31]], i32* [[CONV4_I]], align 4, !noalias !20 -// CHECK2-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED_I]], align 8, !noalias !20 +// CHECK2-NEXT: store i32 [[TMP31]], i32* [[CONV4_I]], align 4, !noalias !21 +// CHECK2-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED_I]], align 8, !noalias !21 // CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP24]], align 4 // CHECK2-NEXT: [[CONV6_I:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED5_I]] to i32* -// CHECK2-NEXT: store i32 [[TMP33]], i32* [[CONV6_I]], align 4, !noalias !20 -// CHECK2-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED5_I]], align 8, !noalias !20 +// CHECK2-NEXT: store i32 [[TMP33]], i32* [[CONV6_I]], align 4, !noalias !21 +// CHECK2-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED5_I]], align 8, !noalias !21 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103(i64 [[TMP30]], i64 [[TMP32]], i64 [[TMP34]]) #[[ATTR3]] // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK2: .omp_outlined..1.exit: @@ -2932,59 +2932,59 @@ int bar(int n){ // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK2-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK2-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !21 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !22 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !21 +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !22 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK2-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double // CHECK2-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 // CHECK2-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK2-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK2-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK2-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4, !llvm.access.group !22 // CHECK2-NEXT: [[CONV13:%.*]] = fpext float [[TMP21]] to double // CHECK2-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.000000e+00 // CHECK2-NEXT: [[CONV15:%.*]] = fptrunc double [[ADD14]] to float -// CHECK2-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4, !llvm.access.group !22 // CHECK2-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 // CHECK2-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX16]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !22 // CHECK2-NEXT: [[ADD18:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK2-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !21 +// CHECK2-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !22 // CHECK2-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] // CHECK2-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP23]] // CHECK2-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX19]], i64 3 -// CHECK2-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8, !llvm.access.group !22 // CHECK2-NEXT: [[ADD21:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK2-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8, !llvm.access.group !21 +// CHECK2-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8, !llvm.access.group !22 // CHECK2-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !22 // CHECK2-NEXT: [[ADD22:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK2-NEXT: store i64 [[ADD22]], i64* [[X]], align 8, !llvm.access.group !21 +// CHECK2-NEXT: store i64 [[ADD22]], i64* [[X]], align 8, !llvm.access.group !22 // CHECK2-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !22 // CHECK2-NEXT: [[CONV23:%.*]] = sext i8 [[TMP26]] to i32 // CHECK2-NEXT: [[ADD24:%.*]] = add nsw i32 [[CONV23]], 1 // CHECK2-NEXT: [[CONV25:%.*]] = trunc i32 [[ADD24]] to i8 -// CHECK2-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8, !llvm.access.group !21 +// CHECK2-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8, !llvm.access.group !22 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK2-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK2-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -4213,25 +4213,25 @@ int bar(int n){ // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK3-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !21 -// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !21 -// CHECK3-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !21 -// CHECK3-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !21 -// CHECK3-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !21 -// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !21 -// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !21 -// CHECK3-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !21 -// CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !21 +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !22 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !22 +// CHECK3-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !22 +// CHECK3-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !22 +// CHECK3-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !22 +// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !22 +// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !22 +// CHECK3-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !22 +// CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !22 // CHECK3-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK3-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !21 -// CHECK3-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !21 -// CHECK3-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !21 -// CHECK3-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !21 +// CHECK3-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !22 +// CHECK3-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !22 +// CHECK3-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !22 +// CHECK3-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !22 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i32 0, i32 0 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i32 0, i32 0 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i32 0, i32 0 @@ -4246,14 +4246,14 @@ int bar(int n){ // CHECK3: omp_offload.failed.i: // CHECK3-NEXT: [[TMP29:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK3-NEXT: [[CONV_I:%.*]] = bitcast i32* [[AA_CASTED_I]] to i16* -// CHECK3-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !21 -// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !21 +// CHECK3-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !22 +// CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !22 // CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK3-NEXT: store i32 [[TMP31]], i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !21 -// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !21 +// CHECK3-NEXT: store i32 [[TMP31]], i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !22 +// CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !22 // CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK3-NEXT: store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !21 -// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !21 +// CHECK3-NEXT: store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !22 +// CHECK3-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !22 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103(i32 [[TMP30]], i32 [[TMP32]], i32 [[TMP34]]) #[[ATTR3]] // CHECK3-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK3: .omp_outlined..1.exit: @@ -4630,59 +4630,59 @@ int bar(int n){ // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[A_ADDR]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[A_ADDR]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double // CHECK3-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 1.000000e+00 // CHECK3-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK3-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK3-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double // CHECK3-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 // CHECK3-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK3-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !23 // CHECK3-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !22 +// CHECK3-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !23 // CHECK3-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] // CHECK3-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP23]] // CHECK3-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i32 3 -// CHECK3-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !23 // CHECK3-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !22 +// CHECK3-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !23 // CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK3-NEXT: store i64 [[ADD20]], i64* [[X]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: store i64 [[ADD20]], i64* [[X]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 // CHECK3-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 // CHECK3-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK3-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4, !llvm.access.group !23 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK3-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -5895,25 +5895,25 @@ int bar(int n){ // CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK4-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK4-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !21 -// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !21 -// CHECK4-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !21 -// CHECK4-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !21 -// CHECK4-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !21 -// CHECK4-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !21 -// CHECK4-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !21 -// CHECK4-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !21 -// CHECK4-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !21 +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !22 +// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !22 +// CHECK4-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !22 +// CHECK4-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !22 +// CHECK4-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !22 +// CHECK4-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !22 +// CHECK4-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !22 +// CHECK4-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !22 +// CHECK4-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !22 // CHECK4-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK4-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK4-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !21 -// CHECK4-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !21 -// CHECK4-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !21 -// CHECK4-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !21 +// CHECK4-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !22 +// CHECK4-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !22 +// CHECK4-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !22 +// CHECK4-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !22 // CHECK4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i32 0, i32 0 // CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i32 0, i32 0 // CHECK4-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i32 0, i32 0 @@ -5928,14 +5928,14 @@ int bar(int n){ // CHECK4: omp_offload.failed.i: // CHECK4-NEXT: [[TMP29:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK4-NEXT: [[CONV_I:%.*]] = bitcast i32* [[AA_CASTED_I]] to i16* -// CHECK4-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !21 -// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !21 +// CHECK4-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !22 +// CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !22 // CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK4-NEXT: store i32 [[TMP31]], i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !21 -// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !21 +// CHECK4-NEXT: store i32 [[TMP31]], i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !22 +// CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !22 // CHECK4-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK4-NEXT: store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !21 -// CHECK4-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !21 +// CHECK4-NEXT: store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !22 +// CHECK4-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !22 // CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103(i32 [[TMP30]], i32 [[TMP32]], i32 [[TMP34]]) #[[ATTR3]] // CHECK4-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK4: .omp_outlined..1.exit: @@ -6312,59 +6312,59 @@ int bar(int n){ // CHECK4: omp.dispatch.body: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK4-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK4-NEXT: store i32 [[ADD7]], i32* [[A_ADDR]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: store i32 [[ADD7]], i32* [[A_ADDR]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK4-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double // CHECK4-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 1.000000e+00 // CHECK4-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK4-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK4-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double // CHECK4-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 // CHECK4-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK4-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 // CHECK4-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i32 0, i32 2 -// CHECK4-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !23 // CHECK4-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK4-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !22 +// CHECK4-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !23 // CHECK4-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] // CHECK4-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP23]] // CHECK4-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i32 3 -// CHECK4-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !23 // CHECK4-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK4-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !22 +// CHECK4-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !23 // CHECK4-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK4-NEXT: store i64 [[ADD20]], i64* [[X]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: store i64 [[ADD20]], i64* [[X]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 // CHECK4-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 // CHECK4-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK4-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4, !llvm.access.group !23 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK4-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -7487,59 +7487,59 @@ int bar(int n){ // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !11 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK9-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !11 +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !13 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double // CHECK9-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 // CHECK9-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK9-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK9-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[CONV13:%.*]] = fpext float [[TMP21]] to double // CHECK9-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.000000e+00 // CHECK9-NEXT: [[CONV15:%.*]] = fptrunc double [[ADD14]] to float -// CHECK9-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 // CHECK9-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX16]], i64 0, i64 2 -// CHECK9-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !13 // CHECK9-NEXT: [[ADD18:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK9-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !11 +// CHECK9-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !13 // CHECK9-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] // CHECK9-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP23]] // CHECK9-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX19]], i64 3 -// CHECK9-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8, !llvm.access.group !13 // CHECK9-NEXT: [[ADD21:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK9-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8, !llvm.access.group !11 +// CHECK9-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8, !llvm.access.group !13 // CHECK9-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !13 // CHECK9-NEXT: [[ADD22:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK9-NEXT: store i64 [[ADD22]], i64* [[X]], align 8, !llvm.access.group !11 +// CHECK9-NEXT: store i64 [[ADD22]], i64* [[X]], align 8, !llvm.access.group !13 // CHECK9-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK9-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !13 // CHECK9-NEXT: [[CONV23:%.*]] = sext i8 [[TMP26]] to i32 // CHECK9-NEXT: [[ADD24:%.*]] = add nsw i32 [[CONV23]], 1 // CHECK9-NEXT: [[CONV25:%.*]] = trunc i32 [[ADD24]] to i8 -// CHECK9-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8, !llvm.access.group !11 +// CHECK9-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8, !llvm.access.group !13 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK9-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: @@ -8335,59 +8335,59 @@ int bar(int n){ // CHECK10: omp.dispatch.body: // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !11 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 // CHECK10-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK10-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !11 +// CHECK10-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !13 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK10-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK10-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double // CHECK10-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 // CHECK10-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK10-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK10-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK10-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4, !llvm.access.group !11 +// CHECK10-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[CONV13:%.*]] = fpext float [[TMP21]] to double // CHECK10-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.000000e+00 // CHECK10-NEXT: [[CONV15:%.*]] = fptrunc double [[ADD14]] to float -// CHECK10-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4, !llvm.access.group !11 +// CHECK10-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 // CHECK10-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX16]], i64 0, i64 2 -// CHECK10-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !11 +// CHECK10-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !13 // CHECK10-NEXT: [[ADD18:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK10-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !11 +// CHECK10-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !13 // CHECK10-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] // CHECK10-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP23]] // CHECK10-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX19]], i64 3 -// CHECK10-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8, !llvm.access.group !11 +// CHECK10-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8, !llvm.access.group !13 // CHECK10-NEXT: [[ADD21:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK10-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8, !llvm.access.group !11 +// CHECK10-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8, !llvm.access.group !13 // CHECK10-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK10-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !11 +// CHECK10-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !13 // CHECK10-NEXT: [[ADD22:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK10-NEXT: store i64 [[ADD22]], i64* [[X]], align 8, !llvm.access.group !11 +// CHECK10-NEXT: store i64 [[ADD22]], i64* [[X]], align 8, !llvm.access.group !13 // CHECK10-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK10-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !11 +// CHECK10-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !13 // CHECK10-NEXT: [[CONV23:%.*]] = sext i8 [[TMP26]] to i32 // CHECK10-NEXT: [[ADD24:%.*]] = add nsw i32 [[CONV23]], 1 // CHECK10-NEXT: [[CONV25:%.*]] = trunc i32 [[ADD24]] to i8 -// CHECK10-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8, !llvm.access.group !11 +// CHECK10-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8, !llvm.access.group !13 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK10-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK10-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: @@ -9172,59 +9172,59 @@ int bar(int n){ // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK11-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !14 // CHECK11-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK11-NEXT: store i32 [[ADD7]], i32* [[A_ADDR]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: store i32 [[ADD7]], i32* [[A_ADDR]], align 4, !llvm.access.group !14 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK11-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double // CHECK11-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 1.000000e+00 // CHECK11-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK11-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK11-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK11-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !14 // CHECK11-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double // CHECK11-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 // CHECK11-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK11-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !14 // CHECK11-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 // CHECK11-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i32 0, i32 2 -// CHECK11-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !14 // CHECK11-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK11-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !12 +// CHECK11-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !14 // CHECK11-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] // CHECK11-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP23]] // CHECK11-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i32 3 -// CHECK11-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !14 // CHECK11-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK11-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !12 +// CHECK11-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !14 // CHECK11-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK11-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !14 // CHECK11-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK11-NEXT: store i64 [[ADD20]], i64* [[X]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: store i64 [[ADD20]], i64* [[X]], align 4, !llvm.access.group !14 // CHECK11-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK11-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !14 // CHECK11-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 // CHECK11-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 // CHECK11-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK11-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4, !llvm.access.group !14 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK11-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK11-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: @@ -9997,59 +9997,59 @@ int bar(int n){ // CHECK12: omp.dispatch.body: // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK12-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK12-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !12 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !14 // CHECK12-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK12-NEXT: store i32 [[ADD7]], i32* [[A_ADDR]], align 4, !llvm.access.group !12 +// CHECK12-NEXT: store i32 [[ADD7]], i32* [[A_ADDR]], align 4, !llvm.access.group !14 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK12-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK12-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK12-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double // CHECK12-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 1.000000e+00 // CHECK12-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK12-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK12-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK12-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK12-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !12 +// CHECK12-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !14 // CHECK12-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double // CHECK12-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 // CHECK12-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK12-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !12 +// CHECK12-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !14 // CHECK12-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 // CHECK12-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i32 0, i32 2 -// CHECK12-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !12 +// CHECK12-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !14 // CHECK12-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK12-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !12 +// CHECK12-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !14 // CHECK12-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] // CHECK12-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP23]] // CHECK12-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i32 3 -// CHECK12-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !12 +// CHECK12-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !14 // CHECK12-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK12-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !12 +// CHECK12-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !14 // CHECK12-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK12-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !12 +// CHECK12-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !14 // CHECK12-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK12-NEXT: store i64 [[ADD20]], i64* [[X]], align 4, !llvm.access.group !12 +// CHECK12-NEXT: store i64 [[ADD20]], i64* [[X]], align 4, !llvm.access.group !14 // CHECK12-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK12-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !12 +// CHECK12-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !14 // CHECK12-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 // CHECK12-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 // CHECK12-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK12-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4, !llvm.access.group !12 +// CHECK12-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4, !llvm.access.group !14 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK12-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK12-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK12-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK12: omp.dispatch.inc: @@ -10931,25 +10931,25 @@ int bar(int n){ // CHECK17-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK17-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK17-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) -// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) -// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) -// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !20 -// CHECK17-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !20 -// CHECK17-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !20 -// CHECK17-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !20 -// CHECK17-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !20 -// CHECK17-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !20 -// CHECK17-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !20 -// CHECK17-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !20 -// CHECK17-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !20 +// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) +// CHECK17-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) +// CHECK17-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !21 +// CHECK17-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !21 +// CHECK17-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !21 +// CHECK17-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !21 +// CHECK17-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !21 +// CHECK17-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 +// CHECK17-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 +// CHECK17-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !21 +// CHECK17-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !21 // CHECK17-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK17-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK17-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !20 -// CHECK17-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !20 -// CHECK17-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !20 -// CHECK17-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !20 +// CHECK17-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !21 +// CHECK17-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !21 +// CHECK17-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !21 +// CHECK17-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !21 // CHECK17-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i64 0, i64 0 // CHECK17-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i64 0, i64 0 // CHECK17-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i64 0, i64 0 @@ -10964,16 +10964,16 @@ int bar(int n){ // CHECK17: omp_offload.failed.i: // CHECK17-NEXT: [[TMP29:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK17-NEXT: [[CONV_I:%.*]] = bitcast i64* [[AA_CASTED_I]] to i16* -// CHECK17-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !20 -// CHECK17-NEXT: [[TMP30:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !20 +// CHECK17-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !21 +// CHECK17-NEXT: [[TMP30:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !21 // CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP23]], align 4 // CHECK17-NEXT: [[CONV4_I:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED_I]] to i32* -// CHECK17-NEXT: store i32 [[TMP31]], i32* [[CONV4_I]], align 4, !noalias !20 -// CHECK17-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED_I]], align 8, !noalias !20 +// CHECK17-NEXT: store i32 [[TMP31]], i32* [[CONV4_I]], align 4, !noalias !21 +// CHECK17-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED_I]], align 8, !noalias !21 // CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP24]], align 4 // CHECK17-NEXT: [[CONV6_I:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED5_I]] to i32* -// CHECK17-NEXT: store i32 [[TMP33]], i32* [[CONV6_I]], align 4, !noalias !20 -// CHECK17-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED5_I]], align 8, !noalias !20 +// CHECK17-NEXT: store i32 [[TMP33]], i32* [[CONV6_I]], align 4, !noalias !21 +// CHECK17-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED5_I]], align 8, !noalias !21 // CHECK17-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103(i64 [[TMP30]], i64 [[TMP32]], i64 [[TMP34]]) #[[ATTR3]] // CHECK17-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK17: .omp_outlined..1.exit: @@ -11362,59 +11362,59 @@ int bar(int n){ // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !21 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !22 // CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !21 +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !22 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK17-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double // CHECK17-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 // CHECK17-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK17-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK17-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK17-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4, !llvm.access.group !22 // CHECK17-NEXT: [[CONV13:%.*]] = fpext float [[TMP21]] to double // CHECK17-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.000000e+00 // CHECK17-NEXT: [[CONV15:%.*]] = fptrunc double [[ADD14]] to float -// CHECK17-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4, !llvm.access.group !22 // CHECK17-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 // CHECK17-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX16]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !22 // CHECK17-NEXT: [[ADD18:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK17-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !21 +// CHECK17-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !22 // CHECK17-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] // CHECK17-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP23]] // CHECK17-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX19]], i64 3 -// CHECK17-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8, !llvm.access.group !22 // CHECK17-NEXT: [[ADD21:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK17-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8, !llvm.access.group !21 +// CHECK17-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8, !llvm.access.group !22 // CHECK17-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !22 // CHECK17-NEXT: [[ADD22:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK17-NEXT: store i64 [[ADD22]], i64* [[X]], align 8, !llvm.access.group !21 +// CHECK17-NEXT: store i64 [[ADD22]], i64* [[X]], align 8, !llvm.access.group !22 // CHECK17-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !22 // CHECK17-NEXT: [[CONV23:%.*]] = sext i8 [[TMP26]] to i32 // CHECK17-NEXT: [[ADD24:%.*]] = add nsw i32 [[CONV23]], 1 // CHECK17-NEXT: [[CONV25:%.*]] = trunc i32 [[ADD24]] to i8 -// CHECK17-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8, !llvm.access.group !21 +// CHECK17-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8, !llvm.access.group !22 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK17-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK17-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: @@ -12651,25 +12651,25 @@ int bar(int n){ // CHECK18-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK18-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK18-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) -// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) -// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) -// CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !20 -// CHECK18-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !20 -// CHECK18-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !20 -// CHECK18-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !20 -// CHECK18-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !20 -// CHECK18-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !20 -// CHECK18-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !20 -// CHECK18-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !20 -// CHECK18-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !20 +// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) +// CHECK18-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) +// CHECK18-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !21 +// CHECK18-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !21 +// CHECK18-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !21 +// CHECK18-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !21 +// CHECK18-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !21 +// CHECK18-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 +// CHECK18-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 +// CHECK18-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !21 +// CHECK18-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !21 // CHECK18-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK18-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK18-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !20 -// CHECK18-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !20 -// CHECK18-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !20 -// CHECK18-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !20 +// CHECK18-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !21 +// CHECK18-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !21 +// CHECK18-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !21 +// CHECK18-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !21 // CHECK18-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i64 0, i64 0 // CHECK18-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i64 0, i64 0 // CHECK18-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i64 0, i64 0 @@ -12684,16 +12684,16 @@ int bar(int n){ // CHECK18: omp_offload.failed.i: // CHECK18-NEXT: [[TMP29:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK18-NEXT: [[CONV_I:%.*]] = bitcast i64* [[AA_CASTED_I]] to i16* -// CHECK18-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !20 -// CHECK18-NEXT: [[TMP30:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !20 +// CHECK18-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !21 +// CHECK18-NEXT: [[TMP30:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !21 // CHECK18-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP23]], align 4 // CHECK18-NEXT: [[CONV4_I:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED_I]] to i32* -// CHECK18-NEXT: store i32 [[TMP31]], i32* [[CONV4_I]], align 4, !noalias !20 -// CHECK18-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED_I]], align 8, !noalias !20 +// CHECK18-NEXT: store i32 [[TMP31]], i32* [[CONV4_I]], align 4, !noalias !21 +// CHECK18-NEXT: [[TMP32:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED_I]], align 8, !noalias !21 // CHECK18-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP24]], align 4 // CHECK18-NEXT: [[CONV6_I:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED5_I]] to i32* -// CHECK18-NEXT: store i32 [[TMP33]], i32* [[CONV6_I]], align 4, !noalias !20 -// CHECK18-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED5_I]], align 8, !noalias !20 +// CHECK18-NEXT: store i32 [[TMP33]], i32* [[CONV6_I]], align 4, !noalias !21 +// CHECK18-NEXT: [[TMP34:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED5_I]], align 8, !noalias !21 // CHECK18-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103(i64 [[TMP30]], i64 [[TMP32]], i64 [[TMP34]]) #[[ATTR3]] // CHECK18-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK18: .omp_outlined..1.exit: @@ -13082,59 +13082,59 @@ int bar(int n){ // CHECK18: omp.dispatch.body: // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK18-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK18-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !21 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !22 // CHECK18-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK18-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !21 +// CHECK18-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !22 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK18-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !21 +// CHECK18-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK18-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double // CHECK18-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 // CHECK18-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK18-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4, !llvm.access.group !21 +// CHECK18-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK18-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK18-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4, !llvm.access.group !21 +// CHECK18-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4, !llvm.access.group !22 // CHECK18-NEXT: [[CONV13:%.*]] = fpext float [[TMP21]] to double // CHECK18-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.000000e+00 // CHECK18-NEXT: [[CONV15:%.*]] = fptrunc double [[ADD14]] to float -// CHECK18-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4, !llvm.access.group !21 +// CHECK18-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4, !llvm.access.group !22 // CHECK18-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 // CHECK18-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX16]], i64 0, i64 2 -// CHECK18-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !21 +// CHECK18-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !22 // CHECK18-NEXT: [[ADD18:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK18-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !21 +// CHECK18-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !22 // CHECK18-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] // CHECK18-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP23]] // CHECK18-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX19]], i64 3 -// CHECK18-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8, !llvm.access.group !21 +// CHECK18-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8, !llvm.access.group !22 // CHECK18-NEXT: [[ADD21:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK18-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8, !llvm.access.group !21 +// CHECK18-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8, !llvm.access.group !22 // CHECK18-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK18-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !21 +// CHECK18-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !22 // CHECK18-NEXT: [[ADD22:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK18-NEXT: store i64 [[ADD22]], i64* [[X]], align 8, !llvm.access.group !21 +// CHECK18-NEXT: store i64 [[ADD22]], i64* [[X]], align 8, !llvm.access.group !22 // CHECK18-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK18-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !21 +// CHECK18-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !22 // CHECK18-NEXT: [[CONV23:%.*]] = sext i8 [[TMP26]] to i32 // CHECK18-NEXT: [[ADD24:%.*]] = add nsw i32 [[CONV23]], 1 // CHECK18-NEXT: [[CONV25:%.*]] = trunc i32 [[ADD24]] to i8 -// CHECK18-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8, !llvm.access.group !21 +// CHECK18-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8, !llvm.access.group !22 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK18-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK18-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK18-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: @@ -14363,25 +14363,25 @@ int bar(int n){ // CHECK19-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK19-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK19-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) -// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) -// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !21 -// CHECK19-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !21 -// CHECK19-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !21 -// CHECK19-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !21 -// CHECK19-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !21 -// CHECK19-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !21 -// CHECK19-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !21 -// CHECK19-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !21 -// CHECK19-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !21 +// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]]) +// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) +// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK19-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK19-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !22 +// CHECK19-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !22 +// CHECK19-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !22 +// CHECK19-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !22 +// CHECK19-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !22 +// CHECK19-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !22 +// CHECK19-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !22 +// CHECK19-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !22 +// CHECK19-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !22 // CHECK19-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK19-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK19-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !21 -// CHECK19-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !21 -// CHECK19-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !21 -// CHECK19-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !21 +// CHECK19-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !22 +// CHECK19-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !22 +// CHECK19-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !22 +// CHECK19-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !22 // CHECK19-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i32 0, i32 0 // CHECK19-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i32 0, i32 0 // CHECK19-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i32 0, i32 0 @@ -14396,14 +14396,14 @@ int bar(int n){ // CHECK19: omp_offload.failed.i: // CHECK19-NEXT: [[TMP29:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK19-NEXT: [[CONV_I:%.*]] = bitcast i32* [[AA_CASTED_I]] to i16* -// CHECK19-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !21 -// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !21 +// CHECK19-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !22 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !22 // CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK19-NEXT: store i32 [[TMP31]], i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !21 -// CHECK19-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !21 +// CHECK19-NEXT: store i32 [[TMP31]], i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !22 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !22 // CHECK19-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK19-NEXT: store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !21 -// CHECK19-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !21 +// CHECK19-NEXT: store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !22 +// CHECK19-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !22 // CHECK19-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103(i32 [[TMP30]], i32 [[TMP32]], i32 [[TMP34]]) #[[ATTR3]] // CHECK19-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK19: .omp_outlined..1.exit: @@ -14780,59 +14780,59 @@ int bar(int n){ // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !23 // CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK19-NEXT: store i32 [[ADD7]], i32* [[A_ADDR]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: store i32 [[ADD7]], i32* [[A_ADDR]], align 4, !llvm.access.group !23 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !23 // CHECK19-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double // CHECK19-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 1.000000e+00 // CHECK19-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK19-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !23 // CHECK19-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK19-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !23 // CHECK19-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double // CHECK19-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 // CHECK19-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK19-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !23 // CHECK19-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 // CHECK19-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !22 +// CHECK19-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !23 // CHECK19-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK19-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !22 +// CHECK19-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !23 // CHECK19-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] // CHECK19-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP23]] // CHECK19-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i32 3 -// CHECK19-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !22 +// CHECK19-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !23 // CHECK19-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK19-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !22 +// CHECK19-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !23 // CHECK19-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !23 // CHECK19-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK19-NEXT: store i64 [[ADD20]], i64* [[X]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: store i64 [[ADD20]], i64* [[X]], align 4, !llvm.access.group !23 // CHECK19-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK19-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !23 // CHECK19-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 // CHECK19-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 // CHECK19-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK19-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4, !llvm.access.group !23 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK19-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK19-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: @@ -16045,25 +16045,25 @@ int bar(int n){ // CHECK20-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK20-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK20-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) -// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) -// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK20-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !21 -// CHECK20-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !21 -// CHECK20-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !21 -// CHECK20-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !21 -// CHECK20-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !21 -// CHECK20-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !21 -// CHECK20-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !21 -// CHECK20-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !21 -// CHECK20-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !21 +// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]]) +// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) +// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK20-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK20-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !22 +// CHECK20-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !22 +// CHECK20-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !22 +// CHECK20-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !22 +// CHECK20-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !22 +// CHECK20-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !22 +// CHECK20-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !22 +// CHECK20-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !22 +// CHECK20-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !22 // CHECK20-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK20-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK20-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !21 -// CHECK20-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !21 -// CHECK20-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !21 -// CHECK20-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !21 +// CHECK20-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !22 +// CHECK20-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !22 +// CHECK20-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !22 +// CHECK20-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !22 // CHECK20-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i32 0, i32 0 // CHECK20-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i32 0, i32 0 // CHECK20-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i32 0, i32 0 @@ -16078,14 +16078,14 @@ int bar(int n){ // CHECK20: omp_offload.failed.i: // CHECK20-NEXT: [[TMP29:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK20-NEXT: [[CONV_I:%.*]] = bitcast i32* [[AA_CASTED_I]] to i16* -// CHECK20-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !21 -// CHECK20-NEXT: [[TMP30:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !21 +// CHECK20-NEXT: store i16 [[TMP29]], i16* [[CONV_I]], align 2, !noalias !22 +// CHECK20-NEXT: [[TMP30:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !22 // CHECK20-NEXT: [[TMP31:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK20-NEXT: store i32 [[TMP31]], i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !21 -// CHECK20-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !21 +// CHECK20-NEXT: store i32 [[TMP31]], i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !22 +// CHECK20-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !22 // CHECK20-NEXT: [[TMP33:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK20-NEXT: store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !21 -// CHECK20-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !21 +// CHECK20-NEXT: store i32 [[TMP33]], i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !22 +// CHECK20-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !22 // CHECK20-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l103(i32 [[TMP30]], i32 [[TMP32]], i32 [[TMP34]]) #[[ATTR3]] // CHECK20-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK20: .omp_outlined..1.exit: @@ -16462,59 +16462,59 @@ int bar(int n){ // CHECK20: omp.dispatch.body: // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 // CHECK20-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK20-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 -// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !22 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 +// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !23 // CHECK20-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK20-NEXT: store i32 [[ADD7]], i32* [[A_ADDR]], align 4, !llvm.access.group !22 +// CHECK20-NEXT: store i32 [[ADD7]], i32* [[A_ADDR]], align 4, !llvm.access.group !23 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK20-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !22 +// CHECK20-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !23 // CHECK20-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double // CHECK20-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 1.000000e+00 // CHECK20-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK20-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !22 +// CHECK20-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !23 // CHECK20-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK20-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !22 +// CHECK20-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !23 // CHECK20-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double // CHECK20-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 // CHECK20-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK20-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !22 +// CHECK20-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !23 // CHECK20-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 // CHECK20-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i32 0, i32 2 -// CHECK20-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !22 +// CHECK20-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !23 // CHECK20-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK20-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !22 +// CHECK20-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !23 // CHECK20-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] // CHECK20-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP23]] // CHECK20-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i32 3 -// CHECK20-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !22 +// CHECK20-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !23 // CHECK20-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK20-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !22 +// CHECK20-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !23 // CHECK20-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK20-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !22 +// CHECK20-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !23 // CHECK20-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK20-NEXT: store i64 [[ADD20]], i64* [[X]], align 4, !llvm.access.group !22 +// CHECK20-NEXT: store i64 [[ADD20]], i64* [[X]], align 4, !llvm.access.group !23 // CHECK20-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK20-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !22 +// CHECK20-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !23 // CHECK20-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 // CHECK20-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 // CHECK20-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK20-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4, !llvm.access.group !22 +// CHECK20-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4, !llvm.access.group !23 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK20-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 // CHECK20-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK20-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK20: omp.dispatch.inc: @@ -17637,59 +17637,59 @@ int bar(int n){ // CHECK25: omp.dispatch.body: // CHECK25-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK25: omp.inner.for.cond: -// CHECK25-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK25-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK25-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK25-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK25-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK25-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK25: omp.inner.for.body: -// CHECK25-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK25-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK25-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK25-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK25-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK25-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !11 +// CHECK25-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK25-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 // CHECK25-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK25-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !11 +// CHECK25-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !13 // CHECK25-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK25-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK25-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK25-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double // CHECK25-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 // CHECK25-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK25-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK25-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK25-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK25-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4, !llvm.access.group !11 +// CHECK25-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4, !llvm.access.group !13 // CHECK25-NEXT: [[CONV13:%.*]] = fpext float [[TMP21]] to double // CHECK25-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.000000e+00 // CHECK25-NEXT: [[CONV15:%.*]] = fptrunc double [[ADD14]] to float -// CHECK25-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4, !llvm.access.group !11 +// CHECK25-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4, !llvm.access.group !13 // CHECK25-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 // CHECK25-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX16]], i64 0, i64 2 -// CHECK25-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !11 +// CHECK25-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !13 // CHECK25-NEXT: [[ADD18:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK25-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !11 +// CHECK25-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !13 // CHECK25-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] // CHECK25-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP23]] // CHECK25-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX19]], i64 3 -// CHECK25-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8, !llvm.access.group !11 +// CHECK25-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8, !llvm.access.group !13 // CHECK25-NEXT: [[ADD21:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK25-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8, !llvm.access.group !11 +// CHECK25-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8, !llvm.access.group !13 // CHECK25-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK25-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !11 +// CHECK25-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !13 // CHECK25-NEXT: [[ADD22:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK25-NEXT: store i64 [[ADD22]], i64* [[X]], align 8, !llvm.access.group !11 +// CHECK25-NEXT: store i64 [[ADD22]], i64* [[X]], align 8, !llvm.access.group !13 // CHECK25-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK25-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !11 +// CHECK25-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !13 // CHECK25-NEXT: [[CONV23:%.*]] = sext i8 [[TMP26]] to i32 // CHECK25-NEXT: [[ADD24:%.*]] = add nsw i32 [[CONV23]], 1 // CHECK25-NEXT: [[CONV25:%.*]] = trunc i32 [[ADD24]] to i8 -// CHECK25-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8, !llvm.access.group !11 +// CHECK25-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8, !llvm.access.group !13 // CHECK25-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK25: omp.body.continue: // CHECK25-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK25: omp.inner.for.inc: -// CHECK25-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK25-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK25-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK25-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK25-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK25-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK25: omp.inner.for.end: // CHECK25-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK25: omp.dispatch.inc: @@ -18485,59 +18485,59 @@ int bar(int n){ // CHECK26: omp.dispatch.body: // CHECK26-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK26: omp.inner.for.cond: -// CHECK26-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK26-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK26-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK26-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK26-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK26-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK26: omp.inner.for.body: -// CHECK26-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK26-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK26-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK26-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK26-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 -// CHECK26-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !11 +// CHECK26-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK26-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 // CHECK26-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK26-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !11 +// CHECK26-NEXT: store i32 [[ADD8]], i32* [[CONV]], align 8, !llvm.access.group !13 // CHECK26-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK26-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK26-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK26-NEXT: [[CONV9:%.*]] = fpext float [[TMP20]] to double // CHECK26-NEXT: [[ADD10:%.*]] = fadd double [[CONV9]], 1.000000e+00 // CHECK26-NEXT: [[CONV11:%.*]] = fptrunc double [[ADD10]] to float -// CHECK26-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK26-NEXT: store float [[CONV11]], float* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK26-NEXT: [[ARRAYIDX12:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK26-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4, !llvm.access.group !11 +// CHECK26-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX12]], align 4, !llvm.access.group !13 // CHECK26-NEXT: [[CONV13:%.*]] = fpext float [[TMP21]] to double // CHECK26-NEXT: [[ADD14:%.*]] = fadd double [[CONV13]], 1.000000e+00 // CHECK26-NEXT: [[CONV15:%.*]] = fptrunc double [[ADD14]] to float -// CHECK26-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4, !llvm.access.group !11 +// CHECK26-NEXT: store float [[CONV15]], float* [[ARRAYIDX12]], align 4, !llvm.access.group !13 // CHECK26-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 // CHECK26-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX16]], i64 0, i64 2 -// CHECK26-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !11 +// CHECK26-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !13 // CHECK26-NEXT: [[ADD18:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK26-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !11 +// CHECK26-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !13 // CHECK26-NEXT: [[TMP23:%.*]] = mul nsw i64 1, [[TMP5]] // CHECK26-NEXT: [[ARRAYIDX19:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP23]] // CHECK26-NEXT: [[ARRAYIDX20:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX19]], i64 3 -// CHECK26-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8, !llvm.access.group !11 +// CHECK26-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX20]], align 8, !llvm.access.group !13 // CHECK26-NEXT: [[ADD21:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK26-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8, !llvm.access.group !11 +// CHECK26-NEXT: store double [[ADD21]], double* [[ARRAYIDX20]], align 8, !llvm.access.group !13 // CHECK26-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK26-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !11 +// CHECK26-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !13 // CHECK26-NEXT: [[ADD22:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK26-NEXT: store i64 [[ADD22]], i64* [[X]], align 8, !llvm.access.group !11 +// CHECK26-NEXT: store i64 [[ADD22]], i64* [[X]], align 8, !llvm.access.group !13 // CHECK26-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK26-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !11 +// CHECK26-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !13 // CHECK26-NEXT: [[CONV23:%.*]] = sext i8 [[TMP26]] to i32 // CHECK26-NEXT: [[ADD24:%.*]] = add nsw i32 [[CONV23]], 1 // CHECK26-NEXT: [[CONV25:%.*]] = trunc i32 [[ADD24]] to i8 -// CHECK26-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8, !llvm.access.group !11 +// CHECK26-NEXT: store i8 [[CONV25]], i8* [[Y]], align 8, !llvm.access.group !13 // CHECK26-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK26: omp.body.continue: // CHECK26-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK26: omp.inner.for.inc: -// CHECK26-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK26-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK26-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK26-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK26-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK26-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK26-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK26: omp.inner.for.end: // CHECK26-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK26: omp.dispatch.inc: @@ -19322,59 +19322,59 @@ int bar(int n){ // CHECK27: omp.dispatch.body: // CHECK27-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK27: omp.inner.for.cond: -// CHECK27-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK27-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK27-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK27-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK27-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK27: omp.inner.for.body: -// CHECK27-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK27-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK27-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK27-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK27-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 +// CHECK27-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !14 // CHECK27-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK27-NEXT: store i32 [[ADD7]], i32* [[A_ADDR]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: store i32 [[ADD7]], i32* [[A_ADDR]], align 4, !llvm.access.group !14 // CHECK27-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK27-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK27-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double // CHECK27-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 1.000000e+00 // CHECK27-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK27-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK27-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK27-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !14 // CHECK27-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double // CHECK27-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 // CHECK27-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK27-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !14 // CHECK27-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 // CHECK27-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i32 0, i32 2 -// CHECK27-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !12 +// CHECK27-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !14 // CHECK27-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK27-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !12 +// CHECK27-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !14 // CHECK27-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] // CHECK27-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP23]] // CHECK27-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i32 3 -// CHECK27-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !12 +// CHECK27-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !14 // CHECK27-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK27-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !12 +// CHECK27-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !14 // CHECK27-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK27-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !14 // CHECK27-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK27-NEXT: store i64 [[ADD20]], i64* [[X]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: store i64 [[ADD20]], i64* [[X]], align 4, !llvm.access.group !14 // CHECK27-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK27-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !14 // CHECK27-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 // CHECK27-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 // CHECK27-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK27-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4, !llvm.access.group !14 // CHECK27-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK27: omp.body.continue: // CHECK27-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK27: omp.inner.for.inc: -// CHECK27-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK27-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK27-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK27-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK27-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK27-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK27: omp.inner.for.end: // CHECK27-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK27: omp.dispatch.inc: @@ -20147,59 +20147,59 @@ int bar(int n){ // CHECK28: omp.dispatch.body: // CHECK28-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK28: omp.inner.for.cond: -// CHECK28-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK28-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK28-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK28-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK28-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK28-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK28: omp.inner.for.body: -// CHECK28-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK28-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK28-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK28-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK28-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 -// CHECK28-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !12 +// CHECK28-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 +// CHECK28-NEXT: [[TMP19:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !14 // CHECK28-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK28-NEXT: store i32 [[ADD7]], i32* [[A_ADDR]], align 4, !llvm.access.group !12 +// CHECK28-NEXT: store i32 [[ADD7]], i32* [[A_ADDR]], align 4, !llvm.access.group !14 // CHECK28-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK28-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK28-NEXT: [[TMP20:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK28-NEXT: [[CONV:%.*]] = fpext float [[TMP20]] to double // CHECK28-NEXT: [[ADD8:%.*]] = fadd double [[CONV]], 1.000000e+00 // CHECK28-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK28-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK28-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK28-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK28-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !12 +// CHECK28-NEXT: [[TMP21:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !14 // CHECK28-NEXT: [[CONV11:%.*]] = fpext float [[TMP21]] to double // CHECK28-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 // CHECK28-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK28-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !12 +// CHECK28-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !14 // CHECK28-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 // CHECK28-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i32 0, i32 2 -// CHECK28-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !12 +// CHECK28-NEXT: [[TMP22:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !14 // CHECK28-NEXT: [[ADD16:%.*]] = fadd double [[TMP22]], 1.000000e+00 -// CHECK28-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !12 +// CHECK28-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !14 // CHECK28-NEXT: [[TMP23:%.*]] = mul nsw i32 1, [[TMP5]] // CHECK28-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP23]] // CHECK28-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i32 3 -// CHECK28-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !12 +// CHECK28-NEXT: [[TMP24:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !14 // CHECK28-NEXT: [[ADD19:%.*]] = fadd double [[TMP24]], 1.000000e+00 -// CHECK28-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !12 +// CHECK28-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !14 // CHECK28-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK28-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !12 +// CHECK28-NEXT: [[TMP25:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !14 // CHECK28-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP25]], 1 -// CHECK28-NEXT: store i64 [[ADD20]], i64* [[X]], align 4, !llvm.access.group !12 +// CHECK28-NEXT: store i64 [[ADD20]], i64* [[X]], align 4, !llvm.access.group !14 // CHECK28-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK28-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !12 +// CHECK28-NEXT: [[TMP26:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !14 // CHECK28-NEXT: [[CONV21:%.*]] = sext i8 [[TMP26]] to i32 // CHECK28-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 // CHECK28-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK28-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4, !llvm.access.group !12 +// CHECK28-NEXT: store i8 [[CONV23]], i8* [[Y]], align 4, !llvm.access.group !14 // CHECK28-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK28: omp.body.continue: // CHECK28-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK28: omp.inner.for.inc: -// CHECK28-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK28-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK28-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP27]], 1 -// CHECK28-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK28-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK28-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK28-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK28: omp.inner.for.end: // CHECK28-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK28: omp.dispatch.inc: @@ -20593,4 +20593,3 @@ int bar(int n){ // CHECK28-NEXT: call void @__kmpc_for_static_fini(%struct.ident_t* @[[GLOB1]], i32 [[TMP2]]) // CHECK28-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp index ff6093290f173..8a278c56a29e0 100644 --- a/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_dist_schedule_codegen.cpp @@ -423,28 +423,28 @@ int main (int argc, char **argv) { // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !7 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !7 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !7 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -770,28 +770,28 @@ int main (int argc, char **argv) { // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !7 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !7 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !7 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !8 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -1115,27 +1115,27 @@ int main (int argc, char **argv) { // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !8 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -1459,27 +1459,27 @@ int main (int argc, char **argv) { // CHECK4: omp.dispatch.body: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !8 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -2077,27 +2077,27 @@ int main (int argc, char **argv) { // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !10 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !10 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !11 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: @@ -2404,27 +2404,27 @@ int main (int argc, char **argv) { // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: @@ -3022,27 +3022,27 @@ int main (int argc, char **argv) { // CHECK10: omp.dispatch.body: // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK10-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK10-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !10 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !11 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !11 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: @@ -3349,27 +3349,27 @@ int main (int argc, char **argv) { // CHECK10: omp.dispatch.body: // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: @@ -3951,26 +3951,26 @@ int main (int argc, char **argv) { // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !11 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !11 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !12 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP19]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: @@ -4275,26 +4275,26 @@ int main (int argc, char **argv) { // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: @@ -4876,26 +4876,26 @@ int main (int argc, char **argv) { // CHECK12: omp.dispatch.body: // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK12-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK12-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !11 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !11 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !12 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !12 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP19]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK12: omp.dispatch.inc: @@ -5200,26 +5200,26 @@ int main (int argc, char **argv) { // CHECK12: omp.dispatch.body: // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK12: omp.dispatch.inc: @@ -5243,4 +5243,3 @@ int main (int argc, char **argv) { // CHECK12-NEXT: call void @__tgt_register_requires(i64 1) // CHECK12-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp index f4a9b3fdb1db2..add8e3cd744af 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_order_codegen.cpp @@ -149,23 +149,23 @@ void gtid_test() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !3 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !3 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -308,23 +308,23 @@ void gtid_test() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !3 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !3 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !3 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -338,4 +338,3 @@ void gtid_test() { // CHECK2-NEXT: call void @__tgt_register_requires(i64 1) // CHECK2-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp index 699d60e0f7948..1978714a32482 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_reduction_task_codegen.cpp @@ -811,26 +811,26 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !11 +// CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* @@ -864,8 +864,8 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] // CHECK1-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP48:%.*]] = getelementptr i8, i8* [[TMP40]], i64 [[TMP47]] -// CHECK1-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !11 +// CHECK1-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !12 // CHECK1-NEXT: ret i32 0 // // @@ -1743,26 +1743,26 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK2-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK2-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK2-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !11 +// CHECK2-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK2-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK2-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK2-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK2-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK2-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* @@ -1796,8 +1796,8 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] // CHECK2-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK2-NEXT: [[TMP48:%.*]] = getelementptr i8, i8* [[TMP40]], i64 [[TMP47]] -// CHECK2-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !11 +// CHECK2-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !12 // CHECK2-NEXT: ret i32 0 // // @@ -1898,4 +1898,3 @@ int main(int argc, char **argv) { // CHECK2: omp.arraycpy.done5: // CHECK2-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp index c7913c1149e4c..3d9b9c871d95b 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_schedule_codegen.cpp @@ -923,28 +923,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -1072,28 +1072,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -1838,28 +1838,28 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -1987,28 +1987,28 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -2731,27 +2731,27 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -2875,27 +2875,27 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -3618,27 +3618,27 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK4-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -3762,27 +3762,27 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK4-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK4-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -4527,28 +4527,28 @@ int main (int argc, char **argv) { // CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: @@ -4676,28 +4676,28 @@ int main (int argc, char **argv) { // CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 // CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: @@ -5442,28 +5442,28 @@ int main (int argc, char **argv) { // CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK6-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK6-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK6-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: @@ -5591,28 +5591,28 @@ int main (int argc, char **argv) { // CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 // CHECK6-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK6-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK6-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: @@ -6335,27 +6335,27 @@ int main (int argc, char **argv) { // CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: @@ -6479,27 +6479,27 @@ int main (int argc, char **argv) { // CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 // CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 +// CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: @@ -7222,27 +7222,27 @@ int main (int argc, char **argv) { // CHECK8-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK8-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK8-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK8: omp.dispatch.inc: @@ -7366,27 +7366,27 @@ int main (int argc, char **argv) { // CHECK8-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK8-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 +// CHECK8-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK8: omp.dispatch.inc: @@ -8685,27 +8685,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !14 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !14 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !15 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 +// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: @@ -8923,27 +8923,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !17 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !18 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: @@ -9718,27 +9718,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !20 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !20 +// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK13-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: @@ -9886,27 +9886,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !23 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !23 +// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: @@ -11205,27 +11205,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !14 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !14 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !15 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !15 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 +// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: @@ -11443,27 +11443,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !17 -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !17 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !18 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !18 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: @@ -12238,27 +12238,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !20 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !20 +// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK14-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK14-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: @@ -12406,27 +12406,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !23 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !23 +// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK14-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK14-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: @@ -13678,26 +13678,26 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK15-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK15-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !15 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !15 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP19]] -// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 +// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK15-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK15: omp.dispatch.inc: @@ -13901,26 +13901,26 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !18 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP20]] -// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK15-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK15: omp.dispatch.inc: @@ -14666,26 +14666,26 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP12]] -// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 +// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK15: omp.dispatch.inc: @@ -14824,26 +14824,26 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 +// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK15: omp.dispatch.inc: @@ -16095,26 +16095,26 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK16-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK16-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !15 -// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !15 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP19]] -// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 +// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK16-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK16-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK16: omp.dispatch.inc: @@ -16318,26 +16318,26 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK16-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK16-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !18 -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !18 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP20]] -// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK16-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK16-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK16: omp.dispatch.inc: @@ -17083,26 +17083,26 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP12]] -// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 +// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK16: omp.dispatch.inc: @@ -17241,26 +17241,26 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 +// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK16: omp.dispatch.inc: @@ -18559,27 +18559,27 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !14 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !15 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !15 // CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: @@ -18797,27 +18797,27 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK17-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK17-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !17 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !17 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !18 // CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: @@ -19592,27 +19592,27 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !20 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 // CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !20 +// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK17-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: @@ -19760,27 +19760,27 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 // CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: @@ -21079,27 +21079,27 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK18-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK18-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !14 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !14 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !15 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !15 // CHECK18-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 +// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK18-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK18-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: @@ -21317,27 +21317,27 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK18-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK18-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !17 -// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !17 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !18 +// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !18 // CHECK18-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK18-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK18-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: @@ -22112,27 +22112,27 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK18-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK18-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !20 -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 // CHECK18-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !20 +// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK18-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK18-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: @@ -22280,27 +22280,27 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK18-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK18-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 -// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !23 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 +// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 // CHECK18-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !23 +// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK18-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: @@ -23552,26 +23552,26 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK19-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK19-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !15 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !15 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP19]] -// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 +// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK19-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: @@ -23775,26 +23775,26 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !18 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP20]] -// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: @@ -24540,26 +24540,26 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP12]] -// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 +// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK19-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: @@ -24698,26 +24698,26 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 +// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK19-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: @@ -25969,26 +25969,26 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK20-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK20-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !15 -// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !15 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP19]] -// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 +// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK20-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK20-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK20: omp.dispatch.inc: @@ -26192,26 +26192,26 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK20-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK20-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !18 -// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !18 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !19 +// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP20]] -// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK20-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK20-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK20-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK20: omp.dispatch.inc: @@ -26957,26 +26957,26 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK20-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK20-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP12]] -// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 +// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK20-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK20-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK20: omp.dispatch.inc: @@ -27115,26 +27115,26 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK20-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK20-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 -// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 +// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 +// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK20-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK20-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK20: omp.dispatch.inc: @@ -27149,4 +27149,3 @@ int main (int argc, char **argv) { // CHECK20-NEXT: call void @__tgt_register_requires(i64 1) // CHECK20-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp index 05ef8ba2b20a3..ae7383dce1500 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_if_codegen.cpp @@ -205,27 +205,27 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !9 // CHECK1-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK1-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !9 +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !9 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -289,24 +289,24 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: store i32 0, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: store i32 0, i32* [[CONV]], align 8, !llvm.access.group !13 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -366,26 +366,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !18 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !18 +// CHECK1-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !18 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !18 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -446,24 +446,24 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z9gtid_testv() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !21 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -587,23 +587,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !24 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -664,24 +664,24 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn4v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: call void @_Z3fn4v(), !llvm.access.group !27 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -741,26 +741,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !30 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !30 +// CHECK1-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !30 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !30 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -821,24 +821,24 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn5v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK1-NEXT: call void @_Z3fn5v(), !llvm.access.group !33 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -911,35 +911,35 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !36 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !36 // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !36 +// CHECK1-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !36 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1000,24 +1000,24 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn6v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK1-NEXT: call void @_Z3fn6v(), !llvm.access.group !39 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1130,23 +1130,23 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !42 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1207,24 +1207,24 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn1v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 +// CHECK1-NEXT: call void @_Z3fn1v(), !llvm.access.group !45 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1284,26 +1284,26 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !48 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !48 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !48 +// CHECK1-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !48 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !48 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !48 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1364,24 +1364,24 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !51 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn2v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !51 +// CHECK1-NEXT: call void @_Z3fn2v(), !llvm.access.group !51 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1454,35 +1454,35 @@ int main() { // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 // CHECK1-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK1-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK1-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !54 // CHECK1-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK1-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK1: omp_if.then: -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !54 // CHECK1-NEXT: br label [[OMP_IF_END:%.*]] // CHECK1: omp_if.else: -// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK1-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK1-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK1-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 +// CHECK1-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK1-NEXT: br label [[OMP_IF_END]] // CHECK1: omp_if.end: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1543,24 +1543,24 @@ int main() { // CHECK1-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: call void @_Z3fn3v() +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 +// CHECK1-NEXT: call void @_Z3fn3v(), !llvm.access.group !57 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1679,27 +1679,27 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !9 // CHECK2-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK2-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK2-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !9 +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !9 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1763,24 +1763,24 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK2-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: store i32 0, i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK2-NEXT: store i32 0, i32* [[CONV]], align 8, !llvm.access.group !13 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1840,26 +1840,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !18 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !18 +// CHECK2-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !18 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !18 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1920,24 +1920,24 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z9gtid_testv() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !21 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2061,23 +2061,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !24 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2138,24 +2138,24 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn4v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK2-NEXT: call void @_Z3fn4v(), !llvm.access.group !27 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2215,26 +2215,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !30 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !30 +// CHECK2-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !30 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !30 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2295,24 +2295,24 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn5v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK2-NEXT: call void @_Z3fn5v(), !llvm.access.group !33 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2385,35 +2385,35 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !36 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !36 // CHECK2-NEXT: br label [[OMP_IF_END:%.*]] // CHECK2: omp_if.else: -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !36 +// CHECK2-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !36 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !36 // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2474,24 +2474,24 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn6v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK2-NEXT: call void @_Z3fn6v(), !llvm.access.group !39 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2604,23 +2604,23 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !42 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !42 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !42 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !42 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2681,24 +2681,24 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn1v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 +// CHECK2-NEXT: call void @_Z3fn1v(), !llvm.access.group !45 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2758,26 +2758,26 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !48 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !48 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !48 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !48 +// CHECK2-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !48 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !48 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !48 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2838,24 +2838,24 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !51 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn2v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !51 +// CHECK2-NEXT: call void @_Z3fn2v(), !llvm.access.group !51 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !51 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2928,35 +2928,35 @@ int main() { // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 // CHECK2-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK2-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK2-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !54 // CHECK2-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK2-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK2: omp_if.then: -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !54 // CHECK2-NEXT: br label [[OMP_IF_END:%.*]] // CHECK2: omp_if.else: -// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK2-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK2-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK2-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 +// CHECK2-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK2-NEXT: br label [[OMP_IF_END]] // CHECK2: omp_if.end: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -3017,24 +3017,24 @@ int main() { // CHECK2-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: call void @_Z3fn3v() +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 +// CHECK2-NEXT: call void @_Z3fn3v(), !llvm.access.group !57 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -3153,27 +3153,27 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !10, !llvm.access.group !9 // CHECK3-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK3-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK3-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !9 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3237,24 +3237,24 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !8 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !10, !llvm.access.group !14 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3314,26 +3314,26 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !19 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !19 +// CHECK3-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !19 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !19 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3394,24 +3394,24 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z9gtid_testv() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !22 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3535,23 +3535,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !25 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3612,24 +3612,24 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn4v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: call void @_Z3fn4v(), !llvm.access.group !28 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3708,7 +3708,7 @@ int main() { // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3786,7 +3786,7 @@ int main() { // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3867,41 +3867,41 @@ int main() { // CHECK3: omp_if.then: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 // CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !34 // CHECK3-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK3-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK3-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 -// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !34 // CHECK3-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then5: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !34 // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !34 +// CHECK3-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !34 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END22:%.*]] // CHECK3: omp_if.else6: @@ -3941,7 +3941,7 @@ int main() { // CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK3-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK3: omp.inner.for.end21: // CHECK3-NEXT: br label [[OMP_IF_END22]] // CHECK3: omp_if.end22: @@ -4011,24 +4011,24 @@ int main() { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn6v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group !38 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: @@ -4067,7 +4067,7 @@ int main() { // CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK3: omp.inner.for.end18: // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: @@ -4139,24 +4139,24 @@ int main() { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42 // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn6v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: call void @_Z3fn6v(), !llvm.access.group !42 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: @@ -4195,7 +4195,7 @@ int main() { // CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK3-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK3: omp.inner.for.end18: // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: @@ -4312,23 +4312,23 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !46 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4389,24 +4389,24 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn1v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 +// CHECK3-NEXT: call void @_Z3fn1v(), !llvm.access.group !49 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4485,7 +4485,7 @@ int main() { // CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4563,7 +4563,7 @@ int main() { // CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4636,35 +4636,35 @@ int main() { // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 // CHECK3-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK3-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !54 // CHECK3-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK3-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK3: omp_if.then: -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !54 // CHECK3-NEXT: br label [[OMP_IF_END:%.*]] // CHECK3: omp_if.else: -// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK3-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK3-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK3-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 +// CHECK3-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK3-NEXT: br label [[OMP_IF_END]] // CHECK3: omp_if.end: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4725,24 +4725,24 @@ int main() { // CHECK3-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: call void @_Z3fn3v() +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 +// CHECK3-NEXT: call void @_Z3fn3v(), !llvm.access.group !57 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4861,27 +4861,27 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !8 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !10, !llvm.access.group !9 // CHECK4-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK4-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK4-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !9 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !9 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -4945,24 +4945,24 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !8 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 +// CHECK4-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !10, !llvm.access.group !14 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5022,26 +5022,26 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !19 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !19 +// CHECK4-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !19 +// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !19 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !19 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5102,24 +5102,24 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z9gtid_testv() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !22 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5243,23 +5243,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !25 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !25 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5320,24 +5320,24 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn4v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28 +// CHECK4-NEXT: call void @_Z3fn4v(), !llvm.access.group !28 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5416,7 +5416,7 @@ int main() { // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5494,7 +5494,7 @@ int main() { // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5575,41 +5575,41 @@ int main() { // CHECK4: omp_if.then: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 // CHECK4-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK4-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK4-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !34 // CHECK4-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK4-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK4-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK4-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 -// CHECK4-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK4-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !34 +// CHECK4-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !34 +// CHECK4-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !34 // CHECK4-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then5: -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !34 // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: -// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] -// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !34 +// CHECK4-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !34 +// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END22:%.*]] // CHECK4: omp_if.else6: @@ -5649,7 +5649,7 @@ int main() { // CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK4-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK4: omp.inner.for.end21: // CHECK4-NEXT: br label [[OMP_IF_END22]] // CHECK4: omp_if.end22: @@ -5719,24 +5719,24 @@ int main() { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 // CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn6v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !38 +// CHECK4-NEXT: call void @_Z3fn6v(), !llvm.access.group !38 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: @@ -5775,7 +5775,7 @@ int main() { // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK4-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK4: omp.inner.for.end18: // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: @@ -5847,24 +5847,24 @@ int main() { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42 // CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn6v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !42 +// CHECK4-NEXT: call void @_Z3fn6v(), !llvm.access.group !42 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: @@ -5903,7 +5903,7 @@ int main() { // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK4-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK4: omp.inner.for.end18: // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: @@ -6020,23 +6020,23 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !46 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6097,24 +6097,24 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn1v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 +// CHECK4-NEXT: call void @_Z3fn1v(), !llvm.access.group !49 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6193,7 +6193,7 @@ int main() { // CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP52:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6271,7 +6271,7 @@ int main() { // CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6344,35 +6344,35 @@ int main() { // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !54 // CHECK4-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !54 // CHECK4-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK4-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK4-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !54 // CHECK4-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK4-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK4: omp_if.then: -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !54 // CHECK4-NEXT: br label [[OMP_IF_END:%.*]] // CHECK4: omp_if.else: -// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK4-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK4-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !54 +// CHECK4-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !54 +// CHECK4-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !54 // CHECK4-NEXT: br label [[OMP_IF_END]] // CHECK4: omp_if.end: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !54 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !54 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP55:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6433,24 +6433,24 @@ int main() { // CHECK4-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !57 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: call void @_Z3fn3v() +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !57 +// CHECK4-NEXT: call void @_Z3fn3v(), !llvm.access.group !57 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !57 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP58:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -7755,27 +7755,27 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 // CHECK9-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK9-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK9-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !13 +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !13 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -7839,24 +7839,24 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: store i32 0, i32* [[CONV]], align 8 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: store i32 0, i32* [[CONV]], align 8, !llvm.access.group !17 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK9-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -7916,26 +7916,26 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !22 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !22 +// CHECK9-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !22 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !22 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -7996,24 +7996,24 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z9gtid_testv() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !25 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8137,23 +8137,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !28 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8214,24 +8214,24 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn4v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !31 +// CHECK9-NEXT: call void @_Z3fn4v(), !llvm.access.group !31 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8291,26 +8291,26 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !34 +// CHECK9-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !34 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8371,24 +8371,24 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn5v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !37 +// CHECK9-NEXT: call void @_Z3fn5v(), !llvm.access.group !37 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8461,35 +8461,35 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !40 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !40 // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !40 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !40 +// CHECK9-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !40 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !40 // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8550,24 +8550,24 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn6v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 +// CHECK9-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8680,23 +8680,23 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !46 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8757,24 +8757,24 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn1v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 +// CHECK9-NEXT: call void @_Z3fn1v(), !llvm.access.group !49 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8834,26 +8834,26 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !52 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !52 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !52 +// CHECK9-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !52 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !52 // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !52 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -8914,24 +8914,24 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !55 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn2v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !55 +// CHECK9-NEXT: call void @_Z3fn2v(), !llvm.access.group !55 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -9004,35 +9004,35 @@ int main() { // CHECK9-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 // CHECK9-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK9-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK9-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !58 // CHECK9-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK9-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK9: omp_if.then: -// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK9-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !58 // CHECK9-NEXT: br label [[OMP_IF_END:%.*]] // CHECK9: omp_if.else: -// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK9-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK9-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !58 +// CHECK9-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !58 +// CHECK9-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 // CHECK9-NEXT: br label [[OMP_IF_END]] // CHECK9: omp_if.end: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -9093,24 +9093,24 @@ int main() { // CHECK9-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: call void @_Z3fn3v() +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61 +// CHECK9-NEXT: call void @_Z3fn3v(), !llvm.access.group !61 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -9229,27 +9229,27 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 // CHECK10-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK10-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK10-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !13 +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !13 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9313,24 +9313,24 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK10-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: store i32 0, i32* [[CONV]], align 8 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK10-NEXT: store i32 0, i32* [[CONV]], align 8, !llvm.access.group !17 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK10-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9390,26 +9390,26 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !22 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !22 +// CHECK10-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !22 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !22 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9470,24 +9470,24 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z9gtid_testv() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 +// CHECK10-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !25 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9611,23 +9611,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !28 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9688,24 +9688,24 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn4v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !31 +// CHECK10-NEXT: call void @_Z3fn4v(), !llvm.access.group !31 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9765,26 +9765,26 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !34 +// CHECK10-NEXT: call void @.omp_outlined..7(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !34 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !34 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9845,24 +9845,24 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn5v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !37 +// CHECK10-NEXT: call void @_Z3fn5v(), !llvm.access.group !37 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -9935,35 +9935,35 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !40 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !40 // CHECK10-NEXT: br label [[OMP_IF_END:%.*]] // CHECK10: omp_if.else: -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !40 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !40 +// CHECK10-NEXT: call void @.omp_outlined..9(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !40 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !40 // CHECK10-NEXT: br label [[OMP_IF_END]] // CHECK10: omp_if.end: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -10024,24 +10024,24 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn6v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !43 +// CHECK10-NEXT: call void @_Z3fn6v(), !llvm.access.group !43 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -10154,23 +10154,23 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..13 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !46 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -10231,24 +10231,24 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn1v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 +// CHECK10-NEXT: call void @_Z3fn1v(), !llvm.access.group !49 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -10308,26 +10308,26 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !52 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !52 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !52 +// CHECK10-NEXT: call void @.omp_outlined..15(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !52 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !52 // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !52 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -10388,24 +10388,24 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !55 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn2v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !55 +// CHECK10-NEXT: call void @_Z3fn2v(), !llvm.access.group !55 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -10478,35 +10478,35 @@ int main() { // CHECK10-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 // CHECK10-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK10-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK10-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !58 // CHECK10-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK10-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK10: omp_if.then: -// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK10-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..17 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !58 // CHECK10-NEXT: br label [[OMP_IF_END:%.*]] // CHECK10: omp_if.else: -// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK10-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK10-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !58 +// CHECK10-NEXT: call void @.omp_outlined..17(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !58 +// CHECK10-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 // CHECK10-NEXT: br label [[OMP_IF_END]] // CHECK10: omp_if.end: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -10567,24 +10567,24 @@ int main() { // CHECK10-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: call void @_Z3fn3v() +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61 +// CHECK10-NEXT: call void @_Z3fn3v(), !llvm.access.group !61 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -10703,27 +10703,27 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !12 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !14, !llvm.access.group !13 // CHECK11-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK11-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK11-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !13 +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !13 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -10787,24 +10787,24 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !12 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !14, !llvm.access.group !18 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -10864,26 +10864,26 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !23 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !23 +// CHECK11-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !23 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !23 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -10944,24 +10944,24 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z9gtid_testv() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !26 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11085,23 +11085,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !29 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11162,24 +11162,24 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn4v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !32 +// CHECK11-NEXT: call void @_Z3fn4v(), !llvm.access.group !32 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11258,7 +11258,7 @@ int main() { // CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11336,7 +11336,7 @@ int main() { // CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11417,41 +11417,41 @@ int main() { // CHECK11: omp_if.then: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 // CHECK11-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK11-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK11-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !38 // CHECK11-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK11-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK11-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 -// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK11-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !38 +// CHECK11-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !38 +// CHECK11-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !38 // CHECK11-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then5: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !38 // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !38 +// CHECK11-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !38 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END22:%.*]] // CHECK11: omp_if.else6: @@ -11491,7 +11491,7 @@ int main() { // CHECK11-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK11-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK11: omp.inner.for.end21: // CHECK11-NEXT: br label [[OMP_IF_END22]] // CHECK11: omp_if.end22: @@ -11561,24 +11561,24 @@ int main() { // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42 // CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn6v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group !42 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: @@ -11617,7 +11617,7 @@ int main() { // CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK11-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK11: omp.inner.for.end18: // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: @@ -11689,24 +11689,24 @@ int main() { // CHECK11-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !46 // CHECK11-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn6v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: call void @_Z3fn6v(), !llvm.access.group !46 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK11-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: @@ -11745,7 +11745,7 @@ int main() { // CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK11-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK11: omp.inner.for.end18: // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: @@ -11862,23 +11862,23 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !50 // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -11939,24 +11939,24 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn1v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !53 +// CHECK11-NEXT: call void @_Z3fn1v(), !llvm.access.group !53 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -12035,7 +12035,7 @@ int main() { // CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -12113,7 +12113,7 @@ int main() { // CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -12186,35 +12186,35 @@ int main() { // CHECK11-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 // CHECK11-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK11-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK11-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK11-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !58 // CHECK11-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK11-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK11: omp_if.then: -// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK11-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !58 // CHECK11-NEXT: br label [[OMP_IF_END:%.*]] // CHECK11: omp_if.else: -// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK11-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK11-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !58 +// CHECK11-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !58 +// CHECK11-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 // CHECK11-NEXT: br label [[OMP_IF_END]] // CHECK11: omp_if.end: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -12275,24 +12275,24 @@ int main() { // CHECK11-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: call void @_Z3fn3v() +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61 +// CHECK11-NEXT: call void @_Z3fn3v(), !llvm.access.group !61 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -12411,27 +12411,27 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !12 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[CONV]], align 8, !nontemporal !14, !llvm.access.group !13 // CHECK12-NEXT: [[CONV2:%.*]] = bitcast i64* [[ARG_CASTED]] to i32* -// CHECK12-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]) +// CHECK12-NEXT: store i32 [[TMP11]], i32* [[CONV2]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: [[TMP12:%.*]] = load i64, i64* [[ARG_CASTED]], align 8, !llvm.access.group !13 +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]], i64 [[TMP12]]), !llvm.access.group !13 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12495,24 +12495,24 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK12-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !12 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK12-NEXT: store i32 0, i32* [[CONV]], align 8, !nontemporal !14, !llvm.access.group !18 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12572,26 +12572,26 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK12-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !23 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !23 +// CHECK12-NEXT: call void @.omp_outlined..3(i32* [[TMP11]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !23 +// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !23 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12652,24 +12652,24 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z9gtid_testv() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !26 +// CHECK12-NEXT: call void @_Z9gtid_testv(), !llvm.access.group !26 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12793,23 +12793,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..5 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !29 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12870,24 +12870,24 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn4v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !32 +// CHECK12-NEXT: call void @_Z3fn4v(), !llvm.access.group !32 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -12966,7 +12966,7 @@ int main() { // CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -13044,7 +13044,7 @@ int main() { // CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -13125,41 +13125,41 @@ int main() { // CHECK12: omp_if.then: // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !38 // CHECK12-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !38 // CHECK12-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK12-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP12:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !38 // CHECK12-NEXT: [[TOBOOL2:%.*]] = trunc i8 [[TMP12]] to i1 // CHECK12-NEXT: [[CONV3:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i8* // CHECK12-NEXT: [[FROMBOOL:%.*]] = zext i1 [[TOBOOL2]] to i8 -// CHECK12-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1 -// CHECK12-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK12-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: store i8 [[FROMBOOL]], i8* [[CONV3]], align 1, !llvm.access.group !38 +// CHECK12-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !38 +// CHECK12-NEXT: [[TMP14:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !38 // CHECK12-NEXT: [[TOBOOL4:%.*]] = trunc i8 [[TMP14]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL4]], label [[OMP_IF_THEN5:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then5: -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]), !llvm.access.group !38 // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] // CHECK12: omp_if.else: -// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK12-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK12-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]] -// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !38 +// CHECK12-NEXT: call void @.omp_outlined..9(i32* [[TMP15]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP9]], i64 [[TMP11]], i64 [[TMP13]]) #[[ATTR2]], !llvm.access.group !38 +// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !38 // CHECK12-NEXT: br label [[OMP_IF_END]] // CHECK12: omp_if.end: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !38 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], [[TMP17]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END22:%.*]] // CHECK12: omp_if.else6: @@ -13199,7 +13199,7 @@ int main() { // CHECK12-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] // CHECK12-NEXT: store i32 [[ADD20]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND7]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK12: omp.inner.for.end21: // CHECK12-NEXT: br label [[OMP_IF_END22]] // CHECK12: omp_if.end22: @@ -13269,24 +13269,24 @@ int main() { // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42 // CHECK12-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn6v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !42 +// CHECK12-NEXT: call void @_Z3fn6v(), !llvm.access.group !42 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] // CHECK12: omp_if.else: @@ -13325,7 +13325,7 @@ int main() { // CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK12-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK12: omp.inner.for.end18: // CHECK12-NEXT: br label [[OMP_IF_END]] // CHECK12: omp_if.end: @@ -13397,24 +13397,24 @@ int main() { // CHECK12-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !46 // CHECK12-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn6v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !46 +// CHECK12-NEXT: call void @_Z3fn6v(), !llvm.access.group !46 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 // CHECK12-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] // CHECK12: omp_if.else: @@ -13453,7 +13453,7 @@ int main() { // CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP20]], 1 // CHECK12-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK12: omp.inner.for.end18: // CHECK12-NEXT: br label [[OMP_IF_END]] // CHECK12: omp_if.end: @@ -13570,23 +13570,23 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !50 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !50 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..14 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !50 // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !50 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP11]], [[TMP12]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -13647,24 +13647,24 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !53 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn1v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !53 +// CHECK12-NEXT: call void @_Z3fn1v(), !llvm.access.group !53 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -13743,7 +13743,7 @@ int main() { // CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] // CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -13821,7 +13821,7 @@ int main() { // CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 // CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -13894,35 +13894,35 @@ int main() { // CHECK12-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 // CHECK12-NEXT: [[TMP8:%.*]] = zext i32 [[TMP7]] to i64 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK12-NEXT: [[TMP10:%.*]] = zext i32 [[TMP9]] to i64 -// CHECK12-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8 +// CHECK12-NEXT: [[TMP11:%.*]] = load i8, i8* [[CONV]], align 8, !llvm.access.group !58 // CHECK12-NEXT: [[TOBOOL:%.*]] = trunc i8 [[TMP11]] to i1 // CHECK12-NEXT: br i1 [[TOBOOL]], label [[OMP_IF_THEN:%.*]], label [[OMP_IF_ELSE:%.*]] // CHECK12: omp_if.then: -// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]) +// CHECK12-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP8]], i64 [[TMP10]]), !llvm.access.group !58 // CHECK12-NEXT: br label [[OMP_IF_END:%.*]] // CHECK12: omp_if.else: -// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) -// CHECK12-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8 -// CHECK12-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]] -// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]) +// CHECK12-NEXT: call void @__kmpc_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8, !llvm.access.group !58 +// CHECK12-NEXT: call void @.omp_outlined..18(i32* [[TMP12]], i32* [[DOTBOUND_ZERO_ADDR]], i64 [[TMP8]], i64 [[TMP10]]) #[[ATTR2]], !llvm.access.group !58 +// CHECK12-NEXT: call void @__kmpc_end_serialized_parallel(%struct.ident_t* @[[GLOB3]], i32 [[TMP1]]), !llvm.access.group !58 // CHECK12-NEXT: br label [[OMP_IF_END]] // CHECK12: omp_if.end: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP13]], [[TMP14]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -13983,24 +13983,24 @@ int main() { // CHECK12-NEXT: store i32 [[TMP6]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: call void @_Z3fn3v() +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61 +// CHECK12-NEXT: call void @_Z3fn3v(), !llvm.access.group !61 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: diff --git a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp index fffec86c7097b..411dd6de11286 100644 --- a/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_parallel_for_simd_schedule_codegen.cpp @@ -378,23 +378,23 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -458,28 +458,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -543,23 +543,23 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !17 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -623,28 +623,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !20 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -708,23 +708,23 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !23 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -800,28 +800,28 @@ int main (int argc, char **argv) { // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -895,23 +895,23 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !29 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -972,28 +972,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -1058,23 +1058,23 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !35 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1135,28 +1135,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -1363,23 +1363,23 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1443,28 +1443,28 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1528,23 +1528,23 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !17 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1608,28 +1608,28 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !20 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1693,23 +1693,23 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 // CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !23 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1785,28 +1785,28 @@ int main (int argc, char **argv) { // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -1880,23 +1880,23 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !29 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1957,28 +1957,28 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -2043,23 +2043,23 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !35 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2120,28 +2120,28 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !38 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -2348,21 +2348,21 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2424,27 +2424,27 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2508,21 +2508,21 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !18 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2584,27 +2584,27 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2668,21 +2668,21 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !24 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2753,27 +2753,27 @@ int main (int argc, char **argv) { // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !27 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP15]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -2847,21 +2847,21 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !30 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2920,27 +2920,27 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !33 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -3005,21 +3005,21 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !36 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3078,27 +3078,27 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !39 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -3305,21 +3305,21 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !9 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3381,27 +3381,27 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3465,21 +3465,21 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !18 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3541,27 +3541,27 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3625,21 +3625,21 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !24 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3710,27 +3710,27 @@ int main (int argc, char **argv) { // CHECK4: omp.dispatch.body: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !27 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP15]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -3804,21 +3804,21 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !30 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3877,27 +3877,27 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !33 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK4-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK4-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -3962,21 +3962,21 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !36 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -4035,27 +4035,27 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !39 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK4-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -4262,23 +4262,23 @@ int main (int argc, char **argv) { // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !8 // CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !8 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !8 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4342,28 +4342,28 @@ int main (int argc, char **argv) { // CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4427,23 +4427,23 @@ int main (int argc, char **argv) { // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !17 // CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !17 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !17 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4507,28 +4507,28 @@ int main (int argc, char **argv) { // CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 // CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !20 // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 // CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !20 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4592,23 +4592,23 @@ int main (int argc, char **argv) { // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 // CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !23 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4684,28 +4684,28 @@ int main (int argc, char **argv) { // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 // CHECK5-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !26 // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !26 // CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: @@ -4779,23 +4779,23 @@ int main (int argc, char **argv) { // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 // CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !29 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4856,28 +4856,28 @@ int main (int argc, char **argv) { // CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !32 // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !32 // CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: @@ -4942,23 +4942,23 @@ int main (int argc, char **argv) { // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 // CHECK5-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK5-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !35 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -5019,28 +5019,28 @@ int main (int argc, char **argv) { // CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !38 // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !38 // CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !38 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: @@ -5247,23 +5247,23 @@ int main (int argc, char **argv) { // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !8 // CHECK6-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 // CHECK6-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !8 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !8 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5327,28 +5327,28 @@ int main (int argc, char **argv) { // CHECK6-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK6-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 // CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK6-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5412,23 +5412,23 @@ int main (int argc, char **argv) { // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !17 // CHECK6-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 // CHECK6-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !17 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !17 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5492,28 +5492,28 @@ int main (int argc, char **argv) { // CHECK6-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 // CHECK6-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !20 // CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 // CHECK6-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !20 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5577,23 +5577,23 @@ int main (int argc, char **argv) { // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 // CHECK6-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK6-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !23 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5669,28 +5669,28 @@ int main (int argc, char **argv) { // CHECK6: omp.dispatch.body: // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 // CHECK6-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK6-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !26 // CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !26 // CHECK6-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK6-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK6-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: @@ -5764,23 +5764,23 @@ int main (int argc, char **argv) { // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 // CHECK6-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK6-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !29 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5841,28 +5841,28 @@ int main (int argc, char **argv) { // CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !32 // CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !32 // CHECK6-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK6-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK6-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: @@ -5927,23 +5927,23 @@ int main (int argc, char **argv) { // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 // CHECK6-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK6-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !35 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -6004,28 +6004,28 @@ int main (int argc, char **argv) { // CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !38 // CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !38 // CHECK6-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !38 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK6-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK6-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: @@ -6232,21 +6232,21 @@ int main (int argc, char **argv) { // CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !9 // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -6308,27 +6308,27 @@ int main (int argc, char **argv) { // CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -6392,21 +6392,21 @@ int main (int argc, char **argv) { // CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !18 // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -6468,27 +6468,27 @@ int main (int argc, char **argv) { // CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 // CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -6552,21 +6552,21 @@ int main (int argc, char **argv) { // CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !24 // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -6637,27 +6637,27 @@ int main (int argc, char **argv) { // CHECK7: omp.dispatch.body: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !27 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP15]] -// CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK7-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: @@ -6731,21 +6731,21 @@ int main (int argc, char **argv) { // CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !30 // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -6804,27 +6804,27 @@ int main (int argc, char **argv) { // CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 // CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !33 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: @@ -6889,21 +6889,21 @@ int main (int argc, char **argv) { // CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !36 // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -6962,27 +6962,27 @@ int main (int argc, char **argv) { // CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 // CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !39 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !39 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: @@ -7189,21 +7189,21 @@ int main (int argc, char **argv) { // CHECK8-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK8-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !9 // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -7265,27 +7265,27 @@ int main (int argc, char **argv) { // CHECK8-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK8-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK8-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK8-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK8-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK8-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK8-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK8-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -7349,21 +7349,21 @@ int main (int argc, char **argv) { // CHECK8-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK8-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK8-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !18 // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -7425,27 +7425,27 @@ int main (int argc, char **argv) { // CHECK8-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK8-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK8-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 // CHECK8-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK8-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK8-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK8-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK8-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -7509,21 +7509,21 @@ int main (int argc, char **argv) { // CHECK8-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK8-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK8-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !24 // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -7594,27 +7594,27 @@ int main (int argc, char **argv) { // CHECK8: omp.dispatch.body: // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK8-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK8-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK8-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !27 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP15]] -// CHECK8-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK8-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK8-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK8-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK8: omp.dispatch.inc: @@ -7688,21 +7688,21 @@ int main (int argc, char **argv) { // CHECK8-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK8-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK8-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !30 // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -7761,27 +7761,27 @@ int main (int argc, char **argv) { // CHECK8-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 // CHECK8-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !33 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK8-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK8: omp.dispatch.inc: @@ -7846,21 +7846,21 @@ int main (int argc, char **argv) { // CHECK8-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK8-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK8-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !36 // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -7919,27 +7919,27 @@ int main (int argc, char **argv) { // CHECK8-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 // CHECK8-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !39 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK8-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !39 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK8: omp.dispatch.inc: @@ -9203,27 +9203,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 // CHECK13-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !13 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !13 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -9320,27 +9320,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !17 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -9449,27 +9449,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !22 // CHECK13-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !22 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !22 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -9566,27 +9566,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !25 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -9708,55 +9708,55 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK13-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] // CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !28 // CHECK13-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !28 // CHECK13-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]) +// CHECK13-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]), !llvm.access.group !28 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK13-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK13-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK13-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK13-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] // CHECK13-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] // CHECK13: cond.true14: -// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !28 // CHECK13-NEXT: br label [[COND_END16:%.*]] // CHECK13: cond.false15: -// CHECK13-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK13-NEXT: br label [[COND_END16]] // CHECK13: cond.end16: // CHECK13-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP32]], [[COND_TRUE14]] ], [ [[TMP33]], [[COND_FALSE15]] ] -// CHECK13-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP34]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK13-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: store i32 [[TMP34]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -9856,27 +9856,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 // CHECK13-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK13-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I7]], align 4 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[I7]], align 4 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !31 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !31 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !31 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK13-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -9985,27 +9985,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 // CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !34 // CHECK13-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !34 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !34 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -10099,27 +10099,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 // CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !37 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !37 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !37 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: @@ -10239,31 +10239,31 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40 // CHECK13-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !40 // CHECK13-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV8]], align 4 -// CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK13-NEXT: store i32 [[TMP19]], i32* [[CONV8]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !40 // CHECK13-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP21]], i32* [[CONV9]], align 4 -// CHECK13-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP22]]) +// CHECK13-NEXT: store i32 [[TMP21]], i32* [[CONV9]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP22]]), !llvm.access.group !40 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -10361,27 +10361,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !26 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !26 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !43 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !43 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 +// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !43 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: @@ -10606,23 +10606,23 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 // CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !46 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -10686,27 +10686,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 // CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !49 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !49 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -10770,23 +10770,23 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 // CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !52 // CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !52 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !52 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -10850,27 +10850,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !55 // CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !55 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !55 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !55 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -10946,27 +10946,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 // CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !58 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58 +// CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !58 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -11046,27 +11046,27 @@ int main (int argc, char **argv) { // CHECK13: omp.dispatch.body: // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 // CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !61 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !61 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: @@ -11140,23 +11140,23 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 // CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !64 // CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !64 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !64 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP65:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -11217,27 +11217,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !67 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !36 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !36 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !67 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !67 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !36 +// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !67 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 // CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK13-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP68:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: @@ -11314,27 +11314,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !70 // CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !70 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70 +// CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !70 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !70 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP71:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -11399,27 +11399,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !73 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !40 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !73 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !73 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !73 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 // CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP74:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: @@ -11913,27 +11913,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 // CHECK14-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 -// CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !13 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !13 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -12030,27 +12030,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK14-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK14-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !17 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !17 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -12159,27 +12159,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !22 // CHECK14-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 -// CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !22 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !22 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -12276,27 +12276,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK14-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK14-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !25 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -12418,55 +12418,55 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK14-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] // CHECK14-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !28 // CHECK14-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK14-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4 -// CHECK14-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !28 // CHECK14-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK14-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4 -// CHECK14-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]) +// CHECK14-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]), !llvm.access.group !28 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK14-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK14-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK14-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK14-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] // CHECK14-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] // CHECK14: cond.true14: -// CHECK14-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK14-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !28 // CHECK14-NEXT: br label [[COND_END16:%.*]] // CHECK14: cond.false15: -// CHECK14-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK14-NEXT: br label [[COND_END16]] // CHECK14: cond.end16: // CHECK14-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP32]], [[COND_TRUE14]] ], [ [[TMP33]], [[COND_FALSE15]] ] -// CHECK14-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP34]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK14-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: store i32 [[TMP34]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -12566,27 +12566,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 // CHECK14-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK14-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I7]], align 4 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[I7]], align 4 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !31 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !31 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !31 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK14-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK14-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -12695,27 +12695,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 // CHECK14-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !34 // CHECK14-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 -// CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !34 +// CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !34 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !34 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -12809,27 +12809,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 // CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !22 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !37 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !37 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !37 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: @@ -12949,31 +12949,31 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK14-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK14-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40 // CHECK14-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !40 // CHECK14-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV8]], align 4 -// CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK14-NEXT: store i32 [[TMP19]], i32* [[CONV8]], align 4, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !40 // CHECK14-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK14-NEXT: store i32 [[TMP21]], i32* [[CONV9]], align 4 -// CHECK14-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP22]]) +// CHECK14-NEXT: store i32 [[TMP21]], i32* [[CONV9]], align 4, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP22]]), !llvm.access.group !40 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -13071,27 +13071,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !26 -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !26 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !43 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !43 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 +// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !43 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: @@ -13316,23 +13316,23 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 // CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !46 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -13396,27 +13396,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 // CHECK14-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK14-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !49 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !49 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK14-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK14-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -13480,23 +13480,23 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 // CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !52 // CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !52 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !52 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -13560,27 +13560,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !55 // CHECK14-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK14-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !55 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !55 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !55 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK14-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK14-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -13656,27 +13656,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 // CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !58 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK14-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK14-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58 +// CHECK14-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !58 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -13756,27 +13756,27 @@ int main (int argc, char **argv) { // CHECK14: omp.dispatch.body: // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 // CHECK14-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK14-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !61 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !61 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: @@ -13850,23 +13850,23 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 // CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !64 // CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !64 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !64 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP65:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -13927,27 +13927,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !67 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !36 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !36 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !67 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !67 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !36 +// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !67 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 // CHECK14-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK14-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP68:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: @@ -14024,27 +14024,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !70 // CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !70 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK14-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK14-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70 +// CHECK14-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !70 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !70 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP71:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -14109,27 +14109,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !73 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !40 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !40 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !73 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !73 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !40 +// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !73 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 // CHECK14-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK14-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP74:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: @@ -14617,24 +14617,24 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !14 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -14728,26 +14728,26 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !18 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] -// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK15-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -14853,24 +14853,24 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !23 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -14964,26 +14964,26 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 // CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !26 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !26 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] -// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK15-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -15099,51 +15099,51 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK15-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]) +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]), !llvm.access.group !29 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK15-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK15-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK15-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 // CHECK15-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP28]], [[TMP29]] // CHECK15-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK15: cond.true11: -// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 // CHECK15-NEXT: br label [[COND_END13:%.*]] // CHECK15: cond.false12: -// CHECK15-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK15-NEXT: br label [[COND_END13]] // CHECK15: cond.end13: // CHECK15-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP30]], [[COND_TRUE11]] ], [ [[TMP31]], [[COND_FALSE12]] ] -// CHECK15-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP32]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK15-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: store i32 [[TMP32]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -15239,26 +15239,26 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 // CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !32 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !32 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] -// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK15-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -15364,24 +15364,24 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !35 +// CHECK15-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !35 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !35 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !35 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -15472,26 +15472,26 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 // CHECK15-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK15-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !23 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !38 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !38 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP19]] -// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !38 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK15-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK15: omp.dispatch.inc: @@ -15605,27 +15605,27 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 // CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP20]]) +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP20]]), !llvm.access.group !41 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !41 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -15719,26 +15719,26 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !44 // CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !27 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !27 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !44 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !44 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP20]] -// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 +// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK15-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK15: omp.dispatch.inc: @@ -15961,21 +15961,21 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !47 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -16037,26 +16037,26 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 // CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !50 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !50 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !50 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK15-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -16120,21 +16120,21 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 // CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !53 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !53 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !53 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -16196,26 +16196,26 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !56 // CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !56 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !56 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !56 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK15-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -16288,24 +16288,24 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !59 +// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !59 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -16379,26 +16379,26 @@ int main (int argc, char **argv) { // CHECK15: omp.dispatch.body: // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 // CHECK15-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !62 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !62 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP16]] -// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !62 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK15-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK15: omp.dispatch.inc: @@ -16472,21 +16472,21 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 // CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !65 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !65 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !65 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP66:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -16545,26 +16545,26 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !68 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !37 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !37 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !68 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !68 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP12]] -// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !37 +// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !68 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP69:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK15: omp.dispatch.inc: @@ -16638,24 +16638,24 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 // CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !71 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !71 +// CHECK15-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !71 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !71 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP72:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -16717,26 +16717,26 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !74 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !41 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !74 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !74 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !74 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP75:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK15: omp.dispatch.inc: @@ -17224,24 +17224,24 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK16-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK16-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !14 +// CHECK16-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !14 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !14 +// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !14 // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -17335,26 +17335,26 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK16-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK16-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !18 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !18 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] -// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK16-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK16-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -17460,24 +17460,24 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK16-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK16-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !23 +// CHECK16-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !23 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !23 +// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !23 // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -17571,26 +17571,26 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 // CHECK16-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK16-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !26 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !26 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] -// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK16-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK16-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -17706,51 +17706,51 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK16-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] // CHECK16-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4 -// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]) +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !29 +// CHECK16-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4, !llvm.access.group !29 +// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !29 +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !29 +// CHECK16-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29 +// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29 +// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]), !llvm.access.group !29 // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK16-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK16-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK16-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK16-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK16-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK16-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK16-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK16-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK16-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK16-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK16-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK16-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK16-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK16-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 // CHECK16-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP28]], [[TMP29]] // CHECK16-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK16: cond.true11: -// CHECK16-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK16-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 // CHECK16-NEXT: br label [[COND_END13:%.*]] // CHECK16: cond.false12: -// CHECK16-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK16-NEXT: br label [[COND_END13]] // CHECK16: cond.end13: // CHECK16-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP30]], [[COND_TRUE11]] ], [ [[TMP31]], [[COND_FALSE12]] ] -// CHECK16-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: store i32 [[TMP32]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK16-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK16-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK16-NEXT: store i32 [[TMP32]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -17846,26 +17846,26 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 // CHECK16-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK16-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !32 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !32 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] -// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK16-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK16-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -17971,24 +17971,24 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK16-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK16-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !35 +// CHECK16-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !35 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !35 +// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !35 // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -18079,26 +18079,26 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 // CHECK16-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK16-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !23 -// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !23 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !38 +// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !38 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP19]] -// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !23 +// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !38 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK16-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK16-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK16: omp.dispatch.inc: @@ -18212,27 +18212,27 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 // CHECK16-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK16-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP20]]) +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !41 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !41 +// CHECK16-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !41 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !41 +// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !41 +// CHECK16-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41 +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41 +// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP20]]), !llvm.access.group !41 // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK16-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !41 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -18326,26 +18326,26 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !44 // CHECK16-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK16-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !27 -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !27 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !44 +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !44 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP20]] -// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 +// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK16-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK16-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK16: omp.dispatch.inc: @@ -18568,21 +18568,21 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK16-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK16-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 +// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !47 // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -18644,26 +18644,26 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 // CHECK16-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK16-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !50 +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !50 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !50 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK16-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK16-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -18727,21 +18727,21 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 // CHECK16-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK16-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !53 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 +// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !53 // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !53 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -18803,26 +18803,26 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !56 // CHECK16-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK16-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !56 +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !56 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !56 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK16-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK16-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -18895,24 +18895,24 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK16-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK16-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !59 +// CHECK16-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59 +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59 +// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !59 // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -18986,26 +18986,26 @@ int main (int argc, char **argv) { // CHECK16: omp.dispatch.body: // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 // CHECK16-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK16-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !62 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !62 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP16]] -// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !62 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK16-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK16-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK16: omp.dispatch.inc: @@ -19079,21 +19079,21 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 // CHECK16-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK16-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !65 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 +// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !65 // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !65 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP66:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -19152,26 +19152,26 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !68 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !37 -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !37 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !68 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !68 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP12]] -// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !37 +// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !68 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP69:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK16: omp.dispatch.inc: @@ -19245,24 +19245,24 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 // CHECK16-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK16-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK16-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !71 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !71 +// CHECK16-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71 +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71 +// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !71 // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !71 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP72:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -19324,26 +19324,26 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !74 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !41 -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !41 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !74 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !74 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 +// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !74 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP75:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK16: omp.dispatch.inc: @@ -19837,27 +19837,27 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 // CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 // CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !13 +// CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !13 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !13 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -19954,27 +19954,27 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !17 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !17 // CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -20083,27 +20083,27 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 // CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !22 // CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !22 +// CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !22 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !22 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -20200,27 +20200,27 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !25 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !25 // CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -20342,55 +20342,55 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !28 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK17-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] // CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 // CHECK17-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK17-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !28 // CHECK17-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !28 // CHECK17-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]) +// CHECK17-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]), !llvm.access.group !28 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK17-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK17-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK17-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK17-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK17-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK17-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !28 // CHECK17-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] // CHECK17-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] // CHECK17: cond.true14: -// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK17-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !28 // CHECK17-NEXT: br label [[COND_END16:%.*]] // CHECK17: cond.false15: -// CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK17-NEXT: br label [[COND_END16]] // CHECK17: cond.end16: // CHECK17-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP32]], [[COND_TRUE14]] ], [ [[TMP33]], [[COND_FALSE15]] ] -// CHECK17-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP34]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK17-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: store i32 [[TMP34]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -20490,27 +20490,27 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 // CHECK17-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK17-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I7]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[I7]], align 4 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !31 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !31 // CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !31 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK17-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK17-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -20619,27 +20619,27 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 // CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !34 // CHECK17-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !34 +// CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !34 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !34 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -20733,27 +20733,27 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 // CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !22 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !22 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !37 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !37 // CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 +// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !37 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: @@ -20873,31 +20873,31 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40 // CHECK17-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK17-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !40 // CHECK17-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV8]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK17-NEXT: store i32 [[TMP19]], i32* [[CONV8]], align 4, !llvm.access.group !40 +// CHECK17-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !40 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !40 // CHECK17-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP21]], i32* [[CONV9]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP22]]) +// CHECK17-NEXT: store i32 [[TMP21]], i32* [[CONV9]], align 4, !llvm.access.group !40 +// CHECK17-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP22]]), !llvm.access.group !40 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -20995,27 +20995,27 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK17-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK17-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !26 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !26 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !43 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !43 // CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 +// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !43 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: @@ -21240,23 +21240,23 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 // CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !46 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -21320,27 +21320,27 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 // CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !49 // CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !49 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -21404,23 +21404,23 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 // CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !52 // CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !52 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !52 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -21484,27 +21484,27 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !55 // CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !55 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !55 // CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !55 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -21580,27 +21580,27 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 // CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !58 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK17-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58 +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !58 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -21680,27 +21680,27 @@ int main (int argc, char **argv) { // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 // CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !61 // CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !61 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: @@ -21774,23 +21774,23 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 // CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !64 // CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !64 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !64 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP65:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -21851,27 +21851,27 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !67 // CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !36 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !36 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !67 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !67 // CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !36 +// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !67 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 // CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK17-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP68:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: @@ -21948,27 +21948,27 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !70 // CHECK17-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK17-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !70 // CHECK17-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK17-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK17-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70 +// CHECK17-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !70 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !70 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP71:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -22033,27 +22033,27 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !73 // CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !40 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !40 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !73 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !73 // CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !40 +// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !73 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP74:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: @@ -22547,27 +22547,27 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK18-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK18-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 // CHECK18-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK18-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !13 // CHECK18-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK18-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 -// CHECK18-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK18-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !13 +// CHECK18-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !13 +// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !13 // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK18-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -22664,27 +22664,27 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK18-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK18-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !17 +// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !17 // CHECK18-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK18-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -22793,27 +22793,27 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK18-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK18-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 // CHECK18-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK18-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !22 // CHECK18-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK18-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 -// CHECK18-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK18-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !22 +// CHECK18-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !22 +// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !22 // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK18-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -22910,27 +22910,27 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK18-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK18-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I5]], align 4 -// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !25 +// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !25 // CHECK18-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK18-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -23052,55 +23052,55 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !28 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK18-NEXT: [[CMP7:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] // CHECK18-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 // CHECK18-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK18-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !28 // CHECK18-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK18-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4 -// CHECK18-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK18-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: store i32 [[TMP20]], i32* [[CONV8]], align 4, !llvm.access.group !28 +// CHECK18-NEXT: [[TMP21:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !28 +// CHECK18-NEXT: [[TMP22:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !28 // CHECK18-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK18-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4 -// CHECK18-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]) +// CHECK18-NEXT: store i32 [[TMP22]], i32* [[CONV9]], align 4, !llvm.access.group !28 +// CHECK18-NEXT: [[TMP23:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28 +// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i64 [[TMP21]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP23]]), !llvm.access.group !28 // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK18-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK18-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK18-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK18-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK18-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK18-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK18-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK18-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK18-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK18-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK18-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK18-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK18-NEXT: store i32 [[ADD11]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK18-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK18-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK18-NEXT: [[ADD12:%.*]] = add nsw i32 [[TMP28]], [[TMP29]] -// CHECK18-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK18-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK18-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK18-NEXT: store i32 [[ADD12]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK18-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK18-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !28 // CHECK18-NEXT: [[CMP13:%.*]] = icmp sgt i32 [[TMP30]], [[TMP31]] // CHECK18-NEXT: br i1 [[CMP13]], label [[COND_TRUE14:%.*]], label [[COND_FALSE15:%.*]] // CHECK18: cond.true14: -// CHECK18-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4 +// CHECK18-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_3]], align 4, !llvm.access.group !28 // CHECK18-NEXT: br label [[COND_END16:%.*]] // CHECK18: cond.false15: -// CHECK18-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK18-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK18-NEXT: br label [[COND_END16]] // CHECK18: cond.end16: // CHECK18-NEXT: [[COND17:%.*]] = phi i32 [ [[TMP32]], [[COND_TRUE14]] ], [ [[TMP33]], [[COND_FALSE15]] ] -// CHECK18-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK18-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK18-NEXT: store i32 [[TMP34]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK18-NEXT: store i32 [[COND17]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK18-NEXT: [[TMP34:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK18-NEXT: store i32 [[TMP34]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -23200,27 +23200,27 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 // CHECK18-NEXT: [[CMP9:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK18-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I7]], align 4 -// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[I7]], align 4 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !31 +// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !31 // CHECK18-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !31 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK18-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK18-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -23329,27 +23329,27 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK18-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK18-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 // CHECK18-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK18-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !34 // CHECK18-NEXT: [[CONV6:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK18-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4 -// CHECK18-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]) +// CHECK18-NEXT: store i32 [[TMP19]], i32* [[CONV6]], align 4, !llvm.access.group !34 +// CHECK18-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !34 +// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !34 // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK18-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -23443,27 +23443,27 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 // CHECK18-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK18-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !22 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !22 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !37 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !37 // CHECK18-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 +// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !37 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK18-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK18-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: @@ -23583,31 +23583,31 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK18-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK18-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40 // CHECK18-NEXT: [[TMP16:%.*]] = zext i32 [[TMP15]] to i64 -// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK18-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !40 // CHECK18-NEXT: [[CONV8:%.*]] = bitcast i64* [[N_CASTED]] to i32* -// CHECK18-NEXT: store i32 [[TMP19]], i32* [[CONV8]], align 4 -// CHECK18-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8 -// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8 +// CHECK18-NEXT: store i32 [[TMP19]], i32* [[CONV8]], align 4, !llvm.access.group !40 +// CHECK18-NEXT: [[TMP20:%.*]] = load i64, i64* [[N_CASTED]], align 8, !llvm.access.group !40 +// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV1]], align 8, !llvm.access.group !40 // CHECK18-NEXT: [[CONV9:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK18-NEXT: store i32 [[TMP21]], i32* [[CONV9]], align 4 -// CHECK18-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP22]]) +// CHECK18-NEXT: store i32 [[TMP21]], i32* [[CONV9]], align 4, !llvm.access.group !40 +// CHECK18-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40 +// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i64, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP16]], i64 [[TMP18]], i64 [[TMP20]], i64 [[TMP0]], i32* [[TMP1]], i64 [[TMP22]]), !llvm.access.group !40 // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK18-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK18-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -23705,27 +23705,27 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 +// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK18-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK18-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !26 -// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !26 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I7]], align 4, !llvm.access.group !43 +// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[I7]], align 4, !llvm.access.group !43 // CHECK18-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 +// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !43 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK18-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK18-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: @@ -23950,23 +23950,23 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK18-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK18-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 // CHECK18-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK18-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !46 // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -24030,27 +24030,27 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 // CHECK18-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK18-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 +// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !49 // CHECK18-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !49 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK18-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -24114,23 +24114,23 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 // CHECK18-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK18-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !52 // CHECK18-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 // CHECK18-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !52 // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !52 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -24194,27 +24194,27 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !55 // CHECK18-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK18-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !55 +// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !55 // CHECK18-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !55 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK18-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -24290,27 +24290,27 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK18-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK18-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 // CHECK18-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK18-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !58 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK18-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK18-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK18-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58 +// CHECK18-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58 +// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !58 // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -24390,27 +24390,27 @@ int main (int argc, char **argv) { // CHECK18: omp.dispatch.body: // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 // CHECK18-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK18-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61 +// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !61 // CHECK18-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !61 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK18-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: @@ -24484,23 +24484,23 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 // CHECK18-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK18-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !64 // CHECK18-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 // CHECK18-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !64 // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !64 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP65:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -24561,27 +24561,27 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !67 // CHECK18-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK18-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !36 -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !36 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !67 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !67 // CHECK18-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !36 +// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !67 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 // CHECK18-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK18-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP68:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: @@ -24658,27 +24658,27 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK18-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK18-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !70 // CHECK18-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK18-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !70 // CHECK18-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK18-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK18-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK18-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70 +// CHECK18-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70 +// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !70 // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !70 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP71:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -24743,27 +24743,27 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !73 // CHECK18-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK18-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !40 -// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !40 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !73 +// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !73 // CHECK18-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !40 +// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !73 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK18-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP74:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: @@ -25251,24 +25251,24 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !14 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -25362,26 +25362,26 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !18 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] -// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -25487,24 +25487,24 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !23 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -25598,26 +25598,26 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 // CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !26 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !26 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] -// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -25733,51 +25733,51 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK19-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]) +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]), !llvm.access.group !29 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK19-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK19-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK19-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 // CHECK19-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP28]], [[TMP29]] // CHECK19-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK19: cond.true11: -// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 // CHECK19-NEXT: br label [[COND_END13:%.*]] // CHECK19: cond.false12: -// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK19-NEXT: br label [[COND_END13]] // CHECK19: cond.end13: // CHECK19-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP30]], [[COND_TRUE11]] ], [ [[TMP31]], [[COND_FALSE12]] ] -// CHECK19-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP32]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK19-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: store i32 [[TMP32]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -25873,26 +25873,26 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 // CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !32 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !32 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] -// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK19-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -25998,24 +25998,24 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !35 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -26106,26 +26106,26 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 // CHECK19-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK19-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !23 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !38 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !38 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP19]] -// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !38 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK19-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: @@ -26239,27 +26239,27 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 // CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP20]]) +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP20]]), !llvm.access.group !41 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !41 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -26353,26 +26353,26 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !44 // CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !27 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !27 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !44 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !44 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP20]] -// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 +// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: @@ -26595,21 +26595,21 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !47 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -26671,26 +26671,26 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 // CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !50 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !50 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !50 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK19-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -26754,21 +26754,21 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 // CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !53 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !53 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -26830,26 +26830,26 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !56 // CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !56 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !56 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !56 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK19-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -26922,24 +26922,24 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !59 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -27013,26 +27013,26 @@ int main (int argc, char **argv) { // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 // CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !62 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !62 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP16]] -// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !62 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK19-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: @@ -27106,21 +27106,21 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 // CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !65 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !65 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP66:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -27179,26 +27179,26 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !68 // CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !37 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !37 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !68 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !68 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP12]] -// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !37 +// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !68 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 // CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK19-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP69:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: @@ -27272,24 +27272,24 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 // CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !71 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !71 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP72:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -27351,26 +27351,26 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !74 // CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !41 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !74 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !74 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !74 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 // CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK19-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP75:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: @@ -27858,24 +27858,24 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK20-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK20-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK20-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 +// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !14 +// CHECK20-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !14 +// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !14 +// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !14 // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -27969,26 +27969,26 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK20-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK20-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !18 +// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !18 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] -// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK20-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK20-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -28094,24 +28094,24 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK20-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK20-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK20-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 +// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !23 +// CHECK20-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !23 +// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !23 +// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !23 // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -28205,26 +28205,26 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 // CHECK20-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK20-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !26 +// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !26 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] -// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK20-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK20-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -28340,51 +28340,51 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP15]], 1 // CHECK20-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP14]], [[ADD]] // CHECK20-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK20-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4 -// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK20-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK20-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]) +// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: store i32 [[TMP18]], i32* [[N_CASTED]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: store i32 [[TMP20]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32 [[TMP19]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP21]]), !llvm.access.group !29 // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK20-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK20-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK20-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK20-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK20-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK20-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP24]], [[TMP25]] -// CHECK20-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK20-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK20-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK20-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK20-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP26]], [[TMP27]] -// CHECK20-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK20-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK20-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK20-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 // CHECK20-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP28]], [[TMP29]] // CHECK20-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK20: cond.true11: -// CHECK20-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK20-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 // CHECK20-NEXT: br label [[COND_END13:%.*]] // CHECK20: cond.false12: -// CHECK20-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK20-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK20-NEXT: br label [[COND_END13]] // CHECK20: cond.end13: // CHECK20-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP30]], [[COND_TRUE11]] ], [ [[TMP31]], [[COND_FALSE12]] ] -// CHECK20-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK20-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK20-NEXT: store i32 [[TMP32]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK20-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: store i32 [[TMP32]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -28480,26 +28480,26 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 // CHECK20-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK20-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !32 +// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !32 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP18]] -// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK20-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK20-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -28605,24 +28605,24 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK20-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK20-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK20-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]) +// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 +// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !35 +// CHECK20-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !35 +// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !35 +// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]]), !llvm.access.group !35 // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP19]], [[TMP20]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -28713,26 +28713,26 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 // CHECK20-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK20-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !23 -// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !23 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !38 +// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !38 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP19]] -// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !23 +// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !38 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK20-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK20-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK20: omp.dispatch.inc: @@ -28846,27 +28846,27 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 // CHECK20-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK20-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4 -// CHECK20-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4 -// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4 -// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK20-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP20]]) +// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !41 +// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 +// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[N_ADDR]], align 4, !llvm.access.group !41 +// CHECK20-NEXT: store i32 [[TMP17]], i32* [[N_CASTED]], align 4, !llvm.access.group !41 +// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[N_CASTED]], align 4, !llvm.access.group !41 +// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !41 +// CHECK20-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41 +// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41 +// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP15]], i32 [[TMP16]], i32 [[TMP18]], i32 [[TMP0]], i32* [[TMP1]], i32 [[TMP20]]), !llvm.access.group !41 // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK20-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK20-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !41 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -28960,26 +28960,26 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !44 // CHECK20-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK20-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !27 -// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !27 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !44 +// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !44 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP20]] -// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 +// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK20-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK20-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK20-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK20: omp.dispatch.inc: @@ -29202,21 +29202,21 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK20-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK20-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 +// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 +// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !47 // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -29278,26 +29278,26 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 // CHECK20-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK20-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !50 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !50 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !50 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK20-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK20-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -29361,21 +29361,21 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 // CHECK20-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK20-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !53 +// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 +// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !53 // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !53 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -29437,26 +29437,26 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !56 // CHECK20-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK20-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !56 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !56 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !56 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK20-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK20-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -29529,24 +29529,24 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK20-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK20-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK20-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 +// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !59 +// CHECK20-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59 +// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !59 // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -29620,26 +29620,26 @@ int main (int argc, char **argv) { // CHECK20: omp.dispatch.body: // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 // CHECK20-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK20-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !62 +// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !62 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP16]] -// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !62 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK20-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK20-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK20: omp.dispatch.inc: @@ -29713,21 +29713,21 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 // CHECK20-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK20-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !65 +// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 +// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !65 // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !65 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP66:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -29786,26 +29786,26 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 +// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !68 // CHECK20-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK20-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !37 -// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !37 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !68 +// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !68 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP12]] -// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !37 +// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !68 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 // CHECK20-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK20-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP69:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK20: omp.dispatch.inc: @@ -29879,24 +29879,24 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 // CHECK20-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK20-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK20-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !71 +// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !71 +// CHECK20-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71 +// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !71 // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !71 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP72:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -29958,26 +29958,26 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !74 // CHECK20-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK20-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !41 -// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !41 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !74 +// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !74 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 +// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !74 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 // CHECK20-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK20-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP75:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK20: omp.dispatch.inc: diff --git a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp index 706db313d3121..d55453261aa51 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_codegen.cpp @@ -679,23 +679,23 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -769,25 +769,25 @@ int bar(int n){ // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) // CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !23 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !26 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !26 +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !26 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !26 +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !26 +// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !26 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !26 +// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !26 +// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !26 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !23 -// CHECK1-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !23 -// CHECK1-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !23 +// CHECK1-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !26 +// CHECK1-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !26 +// CHECK1-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !26 +// CHECK1-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !26 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i64 0, i64 0 // CHECK1-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i64 0, i64 0 // CHECK1-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i64 0, i64 0 @@ -801,16 +801,16 @@ int bar(int n){ // CHECK1: omp_offload.failed.i: // CHECK1-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i64* [[AA_CASTED_I]] to i16* -// CHECK1-NEXT: store i16 [[TMP28]], i16* [[CONV_I]], align 2, !noalias !23 -// CHECK1-NEXT: [[TMP29:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !23 +// CHECK1-NEXT: store i16 [[TMP28]], i16* [[CONV_I]], align 2, !noalias !26 +// CHECK1-NEXT: [[TMP29:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !26 // CHECK1-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP23]], align 4 // CHECK1-NEXT: [[CONV4_I:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED_I]] to i32* -// CHECK1-NEXT: store i32 [[TMP30]], i32* [[CONV4_I]], align 4, !noalias !23 -// CHECK1-NEXT: [[TMP31:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED_I]], align 8, !noalias !23 +// CHECK1-NEXT: store i32 [[TMP30]], i32* [[CONV4_I]], align 4, !noalias !26 +// CHECK1-NEXT: [[TMP31:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED_I]], align 8, !noalias !26 // CHECK1-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP24]], align 4 // CHECK1-NEXT: [[CONV6_I:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED5_I]] to i32* -// CHECK1-NEXT: store i32 [[TMP32]], i32* [[CONV6_I]], align 4, !noalias !23 -// CHECK1-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED5_I]], align 8, !noalias !23 +// CHECK1-NEXT: store i32 [[TMP32]], i32* [[CONV6_I]], align 4, !noalias !26 +// CHECK1-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED5_I]], align 8, !noalias !26 // CHECK1-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l97(i64 [[TMP29]], i64 [[TMP31]], i64 [[TMP33]]) #[[ATTR3]] // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK1: .omp_outlined..1.exit: @@ -890,7 +890,7 @@ int bar(int n){ // CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK1-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -959,28 +959,28 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !29 // CHECK1-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK1-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8 +// CHECK1-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8, !llvm.access.group !29 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1060,31 +1060,31 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !32 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !32 // CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK1-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !32 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1203,59 +1203,59 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK1-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !35 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8, !llvm.access.group !35 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[CONV7:%.*]] = fpext float [[TMP17]] to double // CHECK1-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK1-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK1-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK1-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX10]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[CONV11:%.*]] = fpext float [[TMP18]] to double // CHECK1-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 // CHECK1-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK1-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4 +// CHECK1-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 // CHECK1-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX15]], align 8 +// CHECK1-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !35 // CHECK1-NEXT: [[ADD16:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8 +// CHECK1-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !35 // CHECK1-NEXT: [[TMP20:%.*]] = mul nsw i64 1, [[TMP5]] // CHECK1-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP20]] // CHECK1-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i64 3 -// CHECK1-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX18]], align 8 +// CHECK1-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !35 // CHECK1-NEXT: [[ADD19:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK1-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8 +// CHECK1-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !35 // CHECK1-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 8 +// CHECK1-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !35 // CHECK1-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK1-NEXT: store i64 [[ADD20]], i64* [[X]], align 8 +// CHECK1-NEXT: store i64 [[ADD20]], i64* [[X]], align 8, !llvm.access.group !35 // CHECK1-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK1-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 8 +// CHECK1-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !35 // CHECK1-NEXT: [[CONV21:%.*]] = sext i8 [[TMP23]] to i32 // CHECK1-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 // CHECK1-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK1-NEXT: store i8 [[CONV23]], i8* [[Y]], align 8 +// CHECK1-NEXT: store i8 [[CONV23]], i8* [[Y]], align 8, !llvm.access.group !35 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1684,37 +1684,37 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !38 // CHECK1-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK1-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: store double [[ADD5]], double* [[A]], align 8 +// CHECK1-NEXT: store double [[ADD5]], double* [[A]], align 8, !llvm.access.group !38 // CHECK1-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP13:%.*]] = load double, double* [[A6]], align 8 +// CHECK1-NEXT: [[TMP13:%.*]] = load double, double* [[A6]], align 8, !llvm.access.group !38 // CHECK1-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK1-NEXT: store double [[INC]], double* [[A6]], align 8 +// CHECK1-NEXT: store double [[INC]], double* [[A6]], align 8, !llvm.access.group !38 // CHECK1-NEXT: [[CONV7:%.*]] = fptosi double [[INC]] to i16 // CHECK1-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP14]] // CHECK1-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK1-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2 +// CHECK1-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2, !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1848,42 +1848,42 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[ADD10:%.*]] = add i32 [[TMP17]], 1 // CHECK1-NEXT: [[CMP11:%.*]] = icmp ult i32 [[TMP16]], [[ADD10]] // CHECK1-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK1-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK1-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !41 // CHECK1-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK1-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK1-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8, !llvm.access.group !41 // CHECK1-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK1-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK1-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK1-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8 -// CHECK1-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK1-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8, !llvm.access.group !41 +// CHECK1-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8, !llvm.access.group !41 // CHECK1-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK1-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK1-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK1-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8 +// CHECK1-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8, !llvm.access.group !41 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK1-NEXT: store i32 [[ADD20]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 [[ADD20]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK1-NEXT: [[ADD21:%.*]] = add i32 [[TMP24]], 1 -// CHECK1-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1982,35 +1982,35 @@ int bar(int n){ // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !44 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK1-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !44 +// CHECK1-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !44 // CHECK1-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK1-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK1-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK1-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !44 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK1-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -2418,23 +2418,23 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2508,25 +2508,25 @@ int bar(int n){ // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK2-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) // CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !23 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !26 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !26 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !26 +// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !26 +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !26 +// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !26 +// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !26 +// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !26 +// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !26 // CHECK2-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK2-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !23 -// CHECK2-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !23 -// CHECK2-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !23 +// CHECK2-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !26 +// CHECK2-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !26 +// CHECK2-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !26 +// CHECK2-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !26 // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i64 0, i64 0 // CHECK2-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i64 0, i64 0 // CHECK2-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i64 0, i64 0 @@ -2540,16 +2540,16 @@ int bar(int n){ // CHECK2: omp_offload.failed.i: // CHECK2-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK2-NEXT: [[CONV_I:%.*]] = bitcast i64* [[AA_CASTED_I]] to i16* -// CHECK2-NEXT: store i16 [[TMP28]], i16* [[CONV_I]], align 2, !noalias !23 -// CHECK2-NEXT: [[TMP29:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !23 +// CHECK2-NEXT: store i16 [[TMP28]], i16* [[CONV_I]], align 2, !noalias !26 +// CHECK2-NEXT: [[TMP29:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !26 // CHECK2-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP23]], align 4 // CHECK2-NEXT: [[CONV4_I:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED_I]] to i32* -// CHECK2-NEXT: store i32 [[TMP30]], i32* [[CONV4_I]], align 4, !noalias !23 -// CHECK2-NEXT: [[TMP31:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED_I]], align 8, !noalias !23 +// CHECK2-NEXT: store i32 [[TMP30]], i32* [[CONV4_I]], align 4, !noalias !26 +// CHECK2-NEXT: [[TMP31:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED_I]], align 8, !noalias !26 // CHECK2-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP24]], align 4 // CHECK2-NEXT: [[CONV6_I:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED5_I]] to i32* -// CHECK2-NEXT: store i32 [[TMP32]], i32* [[CONV6_I]], align 4, !noalias !23 -// CHECK2-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED5_I]], align 8, !noalias !23 +// CHECK2-NEXT: store i32 [[TMP32]], i32* [[CONV6_I]], align 4, !noalias !26 +// CHECK2-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED5_I]], align 8, !noalias !26 // CHECK2-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l97(i64 [[TMP29]], i64 [[TMP31]], i64 [[TMP33]]) #[[ATTR3]] // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK2: .omp_outlined..1.exit: @@ -2629,7 +2629,7 @@ int bar(int n){ // CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK2-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2698,28 +2698,28 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !29 // CHECK2-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK2-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK2-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8 +// CHECK2-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8, !llvm.access.group !29 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK2-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2799,31 +2799,31 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !32 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !32 // CHECK2-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK2-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK2-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !32 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2942,59 +2942,59 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK2-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !35 // CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK2-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8, !llvm.access.group !35 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[CONV7:%.*]] = fpext float [[TMP17]] to double // CHECK2-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK2-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK2-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK2-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX10]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[CONV11:%.*]] = fpext float [[TMP18]] to double // CHECK2-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 // CHECK2-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK2-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4 +// CHECK2-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 // CHECK2-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX15]], align 8 +// CHECK2-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !35 // CHECK2-NEXT: [[ADD16:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK2-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8 +// CHECK2-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !35 // CHECK2-NEXT: [[TMP20:%.*]] = mul nsw i64 1, [[TMP5]] // CHECK2-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP20]] // CHECK2-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i64 3 -// CHECK2-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX18]], align 8 +// CHECK2-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !35 // CHECK2-NEXT: [[ADD19:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK2-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8 +// CHECK2-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !35 // CHECK2-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 8 +// CHECK2-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !35 // CHECK2-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK2-NEXT: store i64 [[ADD20]], i64* [[X]], align 8 +// CHECK2-NEXT: store i64 [[ADD20]], i64* [[X]], align 8, !llvm.access.group !35 // CHECK2-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK2-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 8 +// CHECK2-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !35 // CHECK2-NEXT: [[CONV21:%.*]] = sext i8 [[TMP23]] to i32 // CHECK2-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 // CHECK2-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK2-NEXT: store i8 [[CONV23]], i8* [[Y]], align 8 +// CHECK2-NEXT: store i8 [[CONV23]], i8* [[Y]], align 8, !llvm.access.group !35 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK2-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -3423,37 +3423,37 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK2-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !38 // CHECK2-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK2-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: store double [[ADD5]], double* [[A]], align 8 +// CHECK2-NEXT: store double [[ADD5]], double* [[A]], align 8, !llvm.access.group !38 // CHECK2-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP13:%.*]] = load double, double* [[A6]], align 8 +// CHECK2-NEXT: [[TMP13:%.*]] = load double, double* [[A6]], align 8, !llvm.access.group !38 // CHECK2-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK2-NEXT: store double [[INC]], double* [[A6]], align 8 +// CHECK2-NEXT: store double [[INC]], double* [[A6]], align 8, !llvm.access.group !38 // CHECK2-NEXT: [[CONV7:%.*]] = fptosi double [[INC]] to i16 // CHECK2-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP14]] // CHECK2-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK2-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2 +// CHECK2-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2, !llvm.access.group !38 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -3587,42 +3587,42 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[ADD10:%.*]] = add i32 [[TMP17]], 1 // CHECK2-NEXT: [[CMP11:%.*]] = icmp ult i32 [[TMP16]], [[ADD10]] // CHECK2-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !41 +// CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK2-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK2-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4, !llvm.access.group !41 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !41 // CHECK2-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK2-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK2-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8, !llvm.access.group !41 +// CHECK2-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8, !llvm.access.group !41 // CHECK2-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK2-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK2-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK2-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8 -// CHECK2-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK2-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8, !llvm.access.group !41 +// CHECK2-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8, !llvm.access.group !41 // CHECK2-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK2-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK2-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK2-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8 +// CHECK2-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8, !llvm.access.group !41 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK2-NEXT: store i32 [[ADD20]], i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store i32 [[ADD20]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 // CHECK2-NEXT: [[ADD21:%.*]] = add i32 [[TMP24]], 1 -// CHECK2-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -3721,35 +3721,35 @@ int bar(int n){ // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !44 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !44 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK2-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !44 +// CHECK2-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !44 // CHECK2-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK2-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK2-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK2-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK2-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !44 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK2-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -4150,23 +4150,23 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4240,25 +4240,25 @@ int bar(int n){ // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK3-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) // CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 -// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !24 -// CHECK3-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !24 -// CHECK3-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !24 -// CHECK3-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !24 -// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !24 -// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !24 -// CHECK3-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !24 -// CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !24 +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !27 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !27 +// CHECK3-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !27 +// CHECK3-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !27 +// CHECK3-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !27 +// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !27 +// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !27 +// CHECK3-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !27 +// CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !27 // CHECK3-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK3-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !24 -// CHECK3-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !24 -// CHECK3-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !24 -// CHECK3-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !24 +// CHECK3-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !27 +// CHECK3-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !27 +// CHECK3-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !27 +// CHECK3-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !27 // CHECK3-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i32 0, i32 0 // CHECK3-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i32 0, i32 0 // CHECK3-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i32 0, i32 0 @@ -4272,14 +4272,14 @@ int bar(int n){ // CHECK3: omp_offload.failed.i: // CHECK3-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK3-NEXT: [[CONV_I:%.*]] = bitcast i32* [[AA_CASTED_I]] to i16* -// CHECK3-NEXT: store i16 [[TMP28]], i16* [[CONV_I]], align 2, !noalias !24 -// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !24 +// CHECK3-NEXT: store i16 [[TMP28]], i16* [[CONV_I]], align 2, !noalias !27 +// CHECK3-NEXT: [[TMP29:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !27 // CHECK3-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK3-NEXT: store i32 [[TMP30]], i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !24 -// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !24 +// CHECK3-NEXT: store i32 [[TMP30]], i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !27 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !27 // CHECK3-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK3-NEXT: store i32 [[TMP32]], i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !24 -// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !24 +// CHECK3-NEXT: store i32 [[TMP32]], i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !27 +// CHECK3-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !27 // CHECK3-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l97(i32 [[TMP29]], i32 [[TMP31]], i32 [[TMP33]]) #[[ATTR3]] // CHECK3-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK3: .omp_outlined..1.exit: @@ -4356,7 +4356,7 @@ int bar(int n){ // CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK3-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4425,28 +4425,28 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !30 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !30 // CHECK3-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK3-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK3-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4, !llvm.access.group !30 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK3-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4523,31 +4523,31 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !33 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !33 // CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !33 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -4663,59 +4663,59 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 // CHECK3-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK3-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !36 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK3-NEXT: store i32 [[ADD6]], i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[A_ADDR]], align 4, !llvm.access.group !36 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !36 // CHECK3-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double // CHECK3-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 // CHECK3-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK3-NEXT: store float [[CONV8]], float* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store float [[CONV8]], float* [[ARRAYIDX]], align 4, !llvm.access.group !36 // CHECK3-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK3-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX9]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX9]], align 4, !llvm.access.group !36 // CHECK3-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double // CHECK3-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 // CHECK3-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK3-NEXT: store float [[CONV12]], float* [[ARRAYIDX9]], align 4 +// CHECK3-NEXT: store float [[CONV12]], float* [[ARRAYIDX9]], align 4, !llvm.access.group !36 // CHECK3-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 // CHECK3-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX14]], align 8 +// CHECK3-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !36 // CHECK3-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8 +// CHECK3-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8, !llvm.access.group !36 // CHECK3-NEXT: [[TMP20:%.*]] = mul nsw i32 1, [[TMP5]] // CHECK3-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP20]] // CHECK3-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX16]], i32 3 -// CHECK3-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX17]], align 8 +// CHECK3-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !36 // CHECK3-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK3-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8 +// CHECK3-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !36 // CHECK3-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 4 +// CHECK3-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !36 // CHECK3-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK3-NEXT: store i64 [[ADD19]], i64* [[X]], align 4 +// CHECK3-NEXT: store i64 [[ADD19]], i64* [[X]], align 4, !llvm.access.group !36 // CHECK3-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK3-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !36 // CHECK3-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 // CHECK3-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 // CHECK3-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK3-NEXT: store i8 [[CONV22]], i8* [[Y]], align 4 +// CHECK3-NEXT: store i8 [[CONV22]], i8* [[Y]], align 4, !llvm.access.group !36 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK3-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK3-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -5137,37 +5137,37 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[B_ADDR]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double // CHECK3-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: store double [[ADD4]], double* [[A]], align 4 +// CHECK3-NEXT: store double [[ADD4]], double* [[A]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP13:%.*]] = load double, double* [[A5]], align 4 +// CHECK3-NEXT: [[TMP13:%.*]] = load double, double* [[A5]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK3-NEXT: store double [[INC]], double* [[A5]], align 4 +// CHECK3-NEXT: store double [[INC]], double* [[A5]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 // CHECK3-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP14]] // CHECK3-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK3-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// CHECK3-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2, !llvm.access.group !39 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK3-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -5295,42 +5295,42 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42 // CHECK3-NEXT: [[ADD8:%.*]] = add i32 [[TMP17]], 1 // CHECK3-NEXT: [[CMP9:%.*]] = icmp ult i32 [[TMP16]], [[ADD8]] // CHECK3-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK3-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK3-NEXT: [[ADD10:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK3-NEXT: store i32 [[ADD10]], i32* [[I6]], align 4 -// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[ADD10]], i32* [[I6]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !42 // CHECK3-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK3-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !42 // CHECK3-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK3-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK3-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK3-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 -// CHECK3-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK3-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4, !llvm.access.group !42 // CHECK3-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK3-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK3-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK3-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4 +// CHECK3-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4, !llvm.access.group !42 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !42 // CHECK3-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK3-NEXT: store i32 [[ADD18]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store i32 [[ADD18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !42 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK3-NEXT: [[ADD19:%.*]] = add i32 [[TMP24]], 1 -// CHECK3-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -5426,35 +5426,35 @@ int bar(int n){ // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !45 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK3-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !45 +// CHECK3-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !45 // CHECK3-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK3-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK3-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK3-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !45 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK3-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -5855,23 +5855,23 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -5945,25 +5945,25 @@ int bar(int n){ // CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK4-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK4-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) // CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 -// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !24 -// CHECK4-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !24 -// CHECK4-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !24 -// CHECK4-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !24 -// CHECK4-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !24 -// CHECK4-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !24 -// CHECK4-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !24 -// CHECK4-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !24 +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !27 +// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !27 +// CHECK4-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !27 +// CHECK4-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !27 +// CHECK4-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !27 +// CHECK4-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !27 +// CHECK4-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !27 +// CHECK4-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !27 +// CHECK4-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !27 // CHECK4-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK4-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK4-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !24 -// CHECK4-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !24 -// CHECK4-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !24 -// CHECK4-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !24 +// CHECK4-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !27 +// CHECK4-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !27 +// CHECK4-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !27 +// CHECK4-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !27 // CHECK4-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i32 0, i32 0 // CHECK4-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i32 0, i32 0 // CHECK4-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i32 0, i32 0 @@ -5977,14 +5977,14 @@ int bar(int n){ // CHECK4: omp_offload.failed.i: // CHECK4-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK4-NEXT: [[CONV_I:%.*]] = bitcast i32* [[AA_CASTED_I]] to i16* -// CHECK4-NEXT: store i16 [[TMP28]], i16* [[CONV_I]], align 2, !noalias !24 -// CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !24 +// CHECK4-NEXT: store i16 [[TMP28]], i16* [[CONV_I]], align 2, !noalias !27 +// CHECK4-NEXT: [[TMP29:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !27 // CHECK4-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK4-NEXT: store i32 [[TMP30]], i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !24 -// CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !24 +// CHECK4-NEXT: store i32 [[TMP30]], i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !27 +// CHECK4-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !27 // CHECK4-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK4-NEXT: store i32 [[TMP32]], i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !24 -// CHECK4-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !24 +// CHECK4-NEXT: store i32 [[TMP32]], i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !27 +// CHECK4-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !27 // CHECK4-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l97(i32 [[TMP29]], i32 [[TMP31]], i32 [[TMP33]]) #[[ATTR3]] // CHECK4-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK4: .omp_outlined..1.exit: @@ -6061,7 +6061,7 @@ int bar(int n){ // CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK4-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6130,28 +6130,28 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !30 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !30 +// CHECK4-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !30 // CHECK4-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK4-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK4-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4, !llvm.access.group !30 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK4-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK4-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6228,31 +6228,31 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !33 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !33 +// CHECK4-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !33 // CHECK4-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK4-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK4-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !33 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6368,59 +6368,59 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 // CHECK4-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK4-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !36 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !36 // CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK4-NEXT: store i32 [[ADD6]], i32* [[A_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[ADD6]], i32* [[A_ADDR]], align 4, !llvm.access.group !36 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK4-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !36 // CHECK4-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double // CHECK4-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 // CHECK4-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK4-NEXT: store float [[CONV8]], float* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: store float [[CONV8]], float* [[ARRAYIDX]], align 4, !llvm.access.group !36 // CHECK4-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK4-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX9]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX9]], align 4, !llvm.access.group !36 // CHECK4-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double // CHECK4-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 // CHECK4-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK4-NEXT: store float [[CONV12]], float* [[ARRAYIDX9]], align 4 +// CHECK4-NEXT: store float [[CONV12]], float* [[ARRAYIDX9]], align 4, !llvm.access.group !36 // CHECK4-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 // CHECK4-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2 -// CHECK4-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX14]], align 8 +// CHECK4-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !36 // CHECK4-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK4-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8 +// CHECK4-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8, !llvm.access.group !36 // CHECK4-NEXT: [[TMP20:%.*]] = mul nsw i32 1, [[TMP5]] // CHECK4-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP20]] // CHECK4-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX16]], i32 3 -// CHECK4-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX17]], align 8 +// CHECK4-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !36 // CHECK4-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK4-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8 +// CHECK4-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !36 // CHECK4-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 4 +// CHECK4-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !36 // CHECK4-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK4-NEXT: store i64 [[ADD19]], i64* [[X]], align 4 +// CHECK4-NEXT: store i64 [[ADD19]], i64* [[X]], align 4, !llvm.access.group !36 // CHECK4-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK4-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !36 // CHECK4-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 // CHECK4-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 // CHECK4-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK4-NEXT: store i8 [[CONV22]], i8* [[Y]], align 4 +// CHECK4-NEXT: store i8 [[CONV22]], i8* [[Y]], align 4, !llvm.access.group !36 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK4-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK4-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -6842,37 +6842,37 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK4-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[B_ADDR]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double // CHECK4-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: store double [[ADD4]], double* [[A]], align 4 +// CHECK4-NEXT: store double [[ADD4]], double* [[A]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP13:%.*]] = load double, double* [[A5]], align 4 +// CHECK4-NEXT: [[TMP13:%.*]] = load double, double* [[A5]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK4-NEXT: store double [[INC]], double* [[A5]], align 4 +// CHECK4-NEXT: store double [[INC]], double* [[A5]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 // CHECK4-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP14]] // CHECK4-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK4-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// CHECK4-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2, !llvm.access.group !39 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK4-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -7000,42 +7000,42 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !42 // CHECK4-NEXT: [[ADD8:%.*]] = add i32 [[TMP17]], 1 // CHECK4-NEXT: [[CMP9:%.*]] = icmp ult i32 [[TMP16]], [[ADD8]] // CHECK4-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !42 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK4-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK4-NEXT: [[ADD10:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK4-NEXT: store i32 [[ADD10]], i32* [[I6]], align 4 -// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[ADD10]], i32* [[I6]], align 4, !llvm.access.group !42 +// CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !42 // CHECK4-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK4-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4, !llvm.access.group !42 +// CHECK4-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !42 // CHECK4-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK4-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK4-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK4-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 -// CHECK4-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK4-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !42 +// CHECK4-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4, !llvm.access.group !42 // CHECK4-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK4-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK4-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK4-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4 +// CHECK4-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4, !llvm.access.group !42 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !42 // CHECK4-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK4-NEXT: store i32 [[ADD18]], i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: store i32 [[ADD18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !42 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 // CHECK4-NEXT: [[ADD19:%.*]] = add i32 [[TMP24]], 1 -// CHECK4-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !42 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -7131,35 +7131,35 @@ int bar(int n){ // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !45 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !45 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK4-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !45 +// CHECK4-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !45 // CHECK4-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK4-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK4-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK4-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK4-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !45 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK4-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -7567,23 +7567,23 @@ int bar(int n){ // CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -7657,25 +7657,25 @@ int bar(int n){ // CHECK5-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK5-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK5-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) // CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) -// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !23 -// CHECK5-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !23 -// CHECK5-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 -// CHECK5-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK5-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !23 -// CHECK5-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK5-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK5-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK5-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]]) +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !26 +// CHECK5-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !26 +// CHECK5-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !26 +// CHECK5-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !26 +// CHECK5-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !26 +// CHECK5-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !26 +// CHECK5-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !26 +// CHECK5-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !26 +// CHECK5-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !26 // CHECK5-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK5-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK5-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !23 -// CHECK5-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !23 -// CHECK5-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !23 -// CHECK5-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !23 +// CHECK5-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !26 +// CHECK5-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !26 +// CHECK5-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !26 +// CHECK5-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !26 // CHECK5-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i64 0, i64 0 // CHECK5-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i64 0, i64 0 // CHECK5-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i64 0, i64 0 @@ -7689,16 +7689,16 @@ int bar(int n){ // CHECK5: omp_offload.failed.i: // CHECK5-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK5-NEXT: [[CONV_I:%.*]] = bitcast i64* [[AA_CASTED_I]] to i16* -// CHECK5-NEXT: store i16 [[TMP28]], i16* [[CONV_I]], align 2, !noalias !23 -// CHECK5-NEXT: [[TMP29:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !23 +// CHECK5-NEXT: store i16 [[TMP28]], i16* [[CONV_I]], align 2, !noalias !26 +// CHECK5-NEXT: [[TMP29:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !26 // CHECK5-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP23]], align 4 // CHECK5-NEXT: [[CONV4_I:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED_I]] to i32* -// CHECK5-NEXT: store i32 [[TMP30]], i32* [[CONV4_I]], align 4, !noalias !23 -// CHECK5-NEXT: [[TMP31:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED_I]], align 8, !noalias !23 +// CHECK5-NEXT: store i32 [[TMP30]], i32* [[CONV4_I]], align 4, !noalias !26 +// CHECK5-NEXT: [[TMP31:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED_I]], align 8, !noalias !26 // CHECK5-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP24]], align 4 // CHECK5-NEXT: [[CONV6_I:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED5_I]] to i32* -// CHECK5-NEXT: store i32 [[TMP32]], i32* [[CONV6_I]], align 4, !noalias !23 -// CHECK5-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED5_I]], align 8, !noalias !23 +// CHECK5-NEXT: store i32 [[TMP32]], i32* [[CONV6_I]], align 4, !noalias !26 +// CHECK5-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED5_I]], align 8, !noalias !26 // CHECK5-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l97(i64 [[TMP29]], i64 [[TMP31]], i64 [[TMP33]]) #[[ATTR3]] // CHECK5-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK5: .omp_outlined..1.exit: @@ -7767,10 +7767,10 @@ int bar(int n){ // CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[A1]], align 4, !nontemporal !24 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[A1]], align 4, !nontemporal !24 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[A1]], align 4, !nontemporal !27 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[A1]], align 4, !nontemporal !27 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[A1]], align 4, !nontemporal !24 +// CHECK5-NEXT: store i32 [[ADD3]], i32* [[A1]], align 4, !nontemporal !27 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -7778,7 +7778,7 @@ int bar(int n){ // CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK5-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -7847,28 +7847,28 @@ int bar(int n){ // CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !30 // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !30 // CHECK5-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK5-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK5-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8 +// CHECK5-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8, !llvm.access.group !30 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -7948,31 +7948,31 @@ int bar(int n){ // CHECK5-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !33 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK5-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK5-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !33 +// CHECK5-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !33 // CHECK5-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK5-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK5-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK5-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !33 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -8091,59 +8091,59 @@ int bar(int n){ // CHECK5-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 // CHECK5-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK5-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !36 // CHECK5-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK5-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8 +// CHECK5-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8, !llvm.access.group !36 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK5-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !36 // CHECK5-NEXT: [[CONV7:%.*]] = fpext float [[TMP17]] to double // CHECK5-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK5-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK5-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4 +// CHECK5-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !36 // CHECK5-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK5-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX10]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !36 // CHECK5-NEXT: [[CONV11:%.*]] = fpext float [[TMP18]] to double // CHECK5-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 // CHECK5-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK5-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4 +// CHECK5-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !36 // CHECK5-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 // CHECK5-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX15]], align 8 +// CHECK5-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !36 // CHECK5-NEXT: [[ADD16:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK5-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8 +// CHECK5-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !36 // CHECK5-NEXT: [[TMP20:%.*]] = mul nsw i64 1, [[TMP5]] // CHECK5-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP20]] // CHECK5-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i64 3 -// CHECK5-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX18]], align 8 +// CHECK5-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !36 // CHECK5-NEXT: [[ADD19:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK5-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8 +// CHECK5-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !36 // CHECK5-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 8 +// CHECK5-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !36 // CHECK5-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK5-NEXT: store i64 [[ADD20]], i64* [[X]], align 8 +// CHECK5-NEXT: store i64 [[ADD20]], i64* [[X]], align 8, !llvm.access.group !36 // CHECK5-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK5-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 8 +// CHECK5-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !36 // CHECK5-NEXT: [[CONV21:%.*]] = sext i8 [[TMP23]] to i32 // CHECK5-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 // CHECK5-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK5-NEXT: store i8 [[CONV23]], i8* [[Y]], align 8 +// CHECK5-NEXT: store i8 [[CONV23]], i8* [[Y]], align 8, !llvm.access.group !36 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK5-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK5-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -8611,37 +8611,37 @@ int bar(int n){ // CHECK5: omp_if.then: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK5-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK5-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !39 // CHECK5-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP13]] to double // CHECK5-NEXT: [[ADD6:%.*]] = fadd double [[CONV5]], 1.500000e+00 // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: store double [[ADD6]], double* [[A]], align 8 +// CHECK5-NEXT: store double [[ADD6]], double* [[A]], align 8, !llvm.access.group !39 // CHECK5-NEXT: [[A7:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP14:%.*]] = load double, double* [[A7]], align 8 +// CHECK5-NEXT: [[TMP14:%.*]] = load double, double* [[A7]], align 8, !llvm.access.group !39 // CHECK5-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK5-NEXT: store double [[INC]], double* [[A7]], align 8 +// CHECK5-NEXT: store double [[INC]], double* [[A7]], align 8, !llvm.access.group !39 // CHECK5-NEXT: [[CONV8:%.*]] = fptosi double [[INC]] to i16 // CHECK5-NEXT: [[TMP15:%.*]] = mul nsw i64 1, [[TMP2]] // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP15]] // CHECK5-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK5-NEXT: store i16 [[CONV8]], i16* [[ARRAYIDX9]], align 2 +// CHECK5-NEXT: store i16 [[CONV8]], i16* [[ARRAYIDX9]], align 2, !llvm.access.group !39 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK5-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK5-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_IF_END:%.*]] // CHECK5: omp_if.else: @@ -8677,7 +8677,7 @@ int bar(int n){ // CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK5-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND11]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND11]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK5: omp.inner.for.end27: // CHECK5-NEXT: br label [[OMP_IF_END]] // CHECK5: omp_if.end: @@ -8813,42 +8813,42 @@ int bar(int n){ // CHECK5-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK5-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !44 // CHECK5-NEXT: [[ADD10:%.*]] = add i32 [[TMP17]], 1 // CHECK5-NEXT: [[CMP11:%.*]] = icmp ult i32 [[TMP16]], [[ADD10]] // CHECK5-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !44 +// CHECK5-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK5-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK5-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK5-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4 -// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4, !llvm.access.group !44 +// CHECK5-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !44 // CHECK5-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK5-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8 -// CHECK5-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK5-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8, !llvm.access.group !44 +// CHECK5-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8, !llvm.access.group !44 // CHECK5-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK5-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK5-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK5-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8 -// CHECK5-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK5-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8, !llvm.access.group !44 +// CHECK5-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8, !llvm.access.group !44 // CHECK5-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK5-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK5-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK5-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8 +// CHECK5-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8, !llvm.access.group !44 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK5-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK5-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK5-NEXT: store i32 [[ADD20]], i32* [[ARRAYIDX]], align 4 +// CHECK5-NEXT: store i32 [[ADD20]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK5-NEXT: [[ADD21:%.*]] = add i32 [[TMP24]], 1 -// CHECK5-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -8947,35 +8947,35 @@ int bar(int n){ // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 // CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !47 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK5-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK5-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !47 +// CHECK5-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !47 // CHECK5-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK5-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK5-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK5-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK5-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !47 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !47 // CHECK5-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK5-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4 +// CHECK5-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !47 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK5-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -9383,23 +9383,23 @@ int bar(int n){ // CHECK6-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK6-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK6-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -9473,25 +9473,25 @@ int bar(int n){ // CHECK6-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK6-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK6-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) // CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) -// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !23 -// CHECK6-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !23 -// CHECK6-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 -// CHECK6-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK6-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !23 -// CHECK6-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK6-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK6-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK6-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]]) +// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !26 +// CHECK6-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !26 +// CHECK6-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !26 +// CHECK6-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !26 +// CHECK6-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !26 +// CHECK6-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !26 +// CHECK6-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !26 +// CHECK6-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !26 +// CHECK6-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !26 // CHECK6-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK6-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK6-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !23 -// CHECK6-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !23 -// CHECK6-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !23 -// CHECK6-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !23 +// CHECK6-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !26 +// CHECK6-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !26 +// CHECK6-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 8, !noalias !26 +// CHECK6-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 8, !noalias !26 // CHECK6-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i64 0, i64 0 // CHECK6-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i64 0, i64 0 // CHECK6-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i64 0, i64 0 @@ -9505,16 +9505,16 @@ int bar(int n){ // CHECK6: omp_offload.failed.i: // CHECK6-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK6-NEXT: [[CONV_I:%.*]] = bitcast i64* [[AA_CASTED_I]] to i16* -// CHECK6-NEXT: store i16 [[TMP28]], i16* [[CONV_I]], align 2, !noalias !23 -// CHECK6-NEXT: [[TMP29:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !23 +// CHECK6-NEXT: store i16 [[TMP28]], i16* [[CONV_I]], align 2, !noalias !26 +// CHECK6-NEXT: [[TMP29:%.*]] = load i64, i64* [[AA_CASTED_I]], align 8, !noalias !26 // CHECK6-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP23]], align 4 // CHECK6-NEXT: [[CONV4_I:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED_I]] to i32* -// CHECK6-NEXT: store i32 [[TMP30]], i32* [[CONV4_I]], align 4, !noalias !23 -// CHECK6-NEXT: [[TMP31:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED_I]], align 8, !noalias !23 +// CHECK6-NEXT: store i32 [[TMP30]], i32* [[CONV4_I]], align 4, !noalias !26 +// CHECK6-NEXT: [[TMP31:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED_I]], align 8, !noalias !26 // CHECK6-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP24]], align 4 // CHECK6-NEXT: [[CONV6_I:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED5_I]] to i32* -// CHECK6-NEXT: store i32 [[TMP32]], i32* [[CONV6_I]], align 4, !noalias !23 -// CHECK6-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED5_I]], align 8, !noalias !23 +// CHECK6-NEXT: store i32 [[TMP32]], i32* [[CONV6_I]], align 4, !noalias !26 +// CHECK6-NEXT: [[TMP33:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED5_I]], align 8, !noalias !26 // CHECK6-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l97(i64 [[TMP29]], i64 [[TMP31]], i64 [[TMP33]]) #[[ATTR3]] // CHECK6-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK6: .omp_outlined..1.exit: @@ -9583,10 +9583,10 @@ int bar(int n){ // CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[A1]], align 4, !nontemporal !24 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[A1]], align 4, !nontemporal !24 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[A1]], align 4, !nontemporal !27 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[A1]], align 4, !nontemporal !27 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK6-NEXT: store i32 [[ADD3]], i32* [[A1]], align 4, !nontemporal !24 +// CHECK6-NEXT: store i32 [[ADD3]], i32* [[A1]], align 4, !nontemporal !27 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -9594,7 +9594,7 @@ int bar(int n){ // CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK6-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -9663,28 +9663,28 @@ int bar(int n){ // CHECK6-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !30 // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !30 +// CHECK6-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !30 // CHECK6-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK6-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK6-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8 +// CHECK6-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8, !llvm.access.group !30 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK6-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -9764,31 +9764,31 @@ int bar(int n){ // CHECK6-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK6-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !33 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK6-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK6-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK6-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !33 +// CHECK6-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !33 // CHECK6-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK6-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK6-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK6-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK6-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !33 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK6-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK6-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -9907,59 +9907,59 @@ int bar(int n){ // CHECK6-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 // CHECK6-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK6-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !36 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !36 // CHECK6-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK6-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8 +// CHECK6-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8, !llvm.access.group !36 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK6-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK6-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !36 // CHECK6-NEXT: [[CONV7:%.*]] = fpext float [[TMP17]] to double // CHECK6-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK6-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK6-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4 +// CHECK6-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !36 // CHECK6-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK6-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX10]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !36 // CHECK6-NEXT: [[CONV11:%.*]] = fpext float [[TMP18]] to double // CHECK6-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 // CHECK6-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK6-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4 +// CHECK6-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !36 // CHECK6-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 // CHECK6-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i64 0, i64 2 -// CHECK6-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX15]], align 8 +// CHECK6-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !36 // CHECK6-NEXT: [[ADD16:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK6-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8 +// CHECK6-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !36 // CHECK6-NEXT: [[TMP20:%.*]] = mul nsw i64 1, [[TMP5]] // CHECK6-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP20]] // CHECK6-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i64 3 -// CHECK6-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX18]], align 8 +// CHECK6-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !36 // CHECK6-NEXT: [[ADD19:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK6-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8 +// CHECK6-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !36 // CHECK6-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 8 +// CHECK6-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !36 // CHECK6-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK6-NEXT: store i64 [[ADD20]], i64* [[X]], align 8 +// CHECK6-NEXT: store i64 [[ADD20]], i64* [[X]], align 8, !llvm.access.group !36 // CHECK6-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK6-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 8 +// CHECK6-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !36 // CHECK6-NEXT: [[CONV21:%.*]] = sext i8 [[TMP23]] to i32 // CHECK6-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 // CHECK6-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK6-NEXT: store i8 [[CONV23]], i8* [[Y]], align 8 +// CHECK6-NEXT: store i8 [[CONV23]], i8* [[Y]], align 8, !llvm.access.group !36 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK6-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK6-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -10427,37 +10427,37 @@ int bar(int n){ // CHECK6: omp_if.then: // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK6-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK6-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !39 // CHECK6-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP13]] to double // CHECK6-NEXT: [[ADD6:%.*]] = fadd double [[CONV5]], 1.500000e+00 // CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: store double [[ADD6]], double* [[A]], align 8 +// CHECK6-NEXT: store double [[ADD6]], double* [[A]], align 8, !llvm.access.group !39 // CHECK6-NEXT: [[A7:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP14:%.*]] = load double, double* [[A7]], align 8 +// CHECK6-NEXT: [[TMP14:%.*]] = load double, double* [[A7]], align 8, !llvm.access.group !39 // CHECK6-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK6-NEXT: store double [[INC]], double* [[A7]], align 8 +// CHECK6-NEXT: store double [[INC]], double* [[A7]], align 8, !llvm.access.group !39 // CHECK6-NEXT: [[CONV8:%.*]] = fptosi double [[INC]] to i16 // CHECK6-NEXT: [[TMP15:%.*]] = mul nsw i64 1, [[TMP2]] // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP15]] // CHECK6-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK6-NEXT: store i16 [[CONV8]], i16* [[ARRAYIDX9]], align 2 +// CHECK6-NEXT: store i16 [[CONV8]], i16* [[ARRAYIDX9]], align 2, !llvm.access.group !39 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK6-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK6-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_IF_END:%.*]] // CHECK6: omp_if.else: @@ -10493,7 +10493,7 @@ int bar(int n){ // CHECK6-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK6-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND11]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND11]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK6: omp.inner.for.end27: // CHECK6-NEXT: br label [[OMP_IF_END]] // CHECK6: omp_if.end: @@ -10629,42 +10629,42 @@ int bar(int n){ // CHECK6-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK6-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !44 // CHECK6-NEXT: [[ADD10:%.*]] = add i32 [[TMP17]], 1 // CHECK6-NEXT: [[CMP11:%.*]] = icmp ult i32 [[TMP16]], [[ADD10]] // CHECK6-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !44 +// CHECK6-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK6-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK6-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK6-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4 -// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4, !llvm.access.group !44 +// CHECK6-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !44 // CHECK6-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK6-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8 -// CHECK6-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK6-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8, !llvm.access.group !44 +// CHECK6-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8, !llvm.access.group !44 // CHECK6-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK6-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK6-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK6-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8 -// CHECK6-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK6-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8, !llvm.access.group !44 +// CHECK6-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8, !llvm.access.group !44 // CHECK6-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK6-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK6-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK6-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8 +// CHECK6-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8, !llvm.access.group !44 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK6-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK6-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK6-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK6-NEXT: store i32 [[ADD20]], i32* [[ARRAYIDX]], align 4 +// CHECK6-NEXT: store i32 [[ADD20]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK6-NEXT: [[ADD21:%.*]] = add i32 [[TMP24]], 1 -// CHECK6-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -10763,35 +10763,35 @@ int bar(int n){ // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !47 // CHECK6-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !47 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !47 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK6-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK6-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK6-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !47 +// CHECK6-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !47 // CHECK6-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK6-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK6-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK6-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK6-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !47 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !47 // CHECK6-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK6-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4 +// CHECK6-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !47 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 // CHECK6-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK6-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -11192,23 +11192,23 @@ int bar(int n){ // CHECK7-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -11282,25 +11282,25 @@ int bar(int n){ // CHECK7-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK7-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK7-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK7-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) // CHECK7-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) -// CHECK7-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) -// CHECK7-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) -// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 -// CHECK7-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !24 -// CHECK7-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !24 -// CHECK7-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !24 -// CHECK7-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !24 -// CHECK7-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !24 -// CHECK7-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !24 -// CHECK7-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !24 -// CHECK7-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !24 +// CHECK7-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) +// CHECK7-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) +// CHECK7-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) +// CHECK7-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !27 +// CHECK7-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !27 +// CHECK7-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !27 +// CHECK7-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !27 +// CHECK7-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !27 +// CHECK7-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !27 +// CHECK7-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !27 +// CHECK7-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !27 +// CHECK7-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !27 // CHECK7-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK7-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK7-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !24 -// CHECK7-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !24 -// CHECK7-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !24 -// CHECK7-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !24 +// CHECK7-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !27 +// CHECK7-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !27 +// CHECK7-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !27 +// CHECK7-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !27 // CHECK7-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i32 0, i32 0 // CHECK7-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i32 0, i32 0 // CHECK7-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i32 0, i32 0 @@ -11314,14 +11314,14 @@ int bar(int n){ // CHECK7: omp_offload.failed.i: // CHECK7-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK7-NEXT: [[CONV_I:%.*]] = bitcast i32* [[AA_CASTED_I]] to i16* -// CHECK7-NEXT: store i16 [[TMP28]], i16* [[CONV_I]], align 2, !noalias !24 -// CHECK7-NEXT: [[TMP29:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !24 +// CHECK7-NEXT: store i16 [[TMP28]], i16* [[CONV_I]], align 2, !noalias !27 +// CHECK7-NEXT: [[TMP29:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !27 // CHECK7-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK7-NEXT: store i32 [[TMP30]], i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !24 -// CHECK7-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !24 +// CHECK7-NEXT: store i32 [[TMP30]], i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !27 +// CHECK7-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !27 // CHECK7-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK7-NEXT: store i32 [[TMP32]], i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !24 -// CHECK7-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !24 +// CHECK7-NEXT: store i32 [[TMP32]], i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !27 +// CHECK7-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !27 // CHECK7-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l97(i32 [[TMP29]], i32 [[TMP31]], i32 [[TMP33]]) #[[ATTR3]] // CHECK7-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK7: .omp_outlined..1.exit: @@ -11387,10 +11387,10 @@ int bar(int n){ // CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[A1]], align 4, !nontemporal !25 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[A1]], align 4, !nontemporal !25 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[A1]], align 4, !nontemporal !28 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[A1]], align 4, !nontemporal !28 // CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK7-NEXT: store i32 [[ADD3]], i32* [[A1]], align 4, !nontemporal !25 +// CHECK7-NEXT: store i32 [[ADD3]], i32* [[A1]], align 4, !nontemporal !28 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -11398,7 +11398,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK7-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -11467,28 +11467,28 @@ int bar(int n){ // CHECK7-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 // CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !31 +// CHECK7-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !31 // CHECK7-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK7-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK7-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 +// CHECK7-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4, !llvm.access.group !31 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK7-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK7-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -11565,31 +11565,31 @@ int bar(int n){ // CHECK7-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !34 // CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !34 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !34 // CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK7-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !34 +// CHECK7-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !34 // CHECK7-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK7-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK7-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK7-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !34 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK7-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -11705,59 +11705,59 @@ int bar(int n){ // CHECK7-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 // CHECK7-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK7-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !37 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !37 // CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK7-NEXT: store i32 [[ADD6]], i32* [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[ADD6]], i32* [[A_ADDR]], align 4, !llvm.access.group !37 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK7-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !37 // CHECK7-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double // CHECK7-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 // CHECK7-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK7-NEXT: store float [[CONV8]], float* [[ARRAYIDX]], align 4 +// CHECK7-NEXT: store float [[CONV8]], float* [[ARRAYIDX]], align 4, !llvm.access.group !37 // CHECK7-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK7-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX9]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX9]], align 4, !llvm.access.group !37 // CHECK7-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double // CHECK7-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 // CHECK7-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK7-NEXT: store float [[CONV12]], float* [[ARRAYIDX9]], align 4 +// CHECK7-NEXT: store float [[CONV12]], float* [[ARRAYIDX9]], align 4, !llvm.access.group !37 // CHECK7-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 // CHECK7-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX14]], align 8 +// CHECK7-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !37 // CHECK7-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK7-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8 +// CHECK7-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8, !llvm.access.group !37 // CHECK7-NEXT: [[TMP20:%.*]] = mul nsw i32 1, [[TMP5]] // CHECK7-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP20]] // CHECK7-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX16]], i32 3 -// CHECK7-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX17]], align 8 +// CHECK7-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !37 // CHECK7-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK7-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8 +// CHECK7-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !37 // CHECK7-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 4 +// CHECK7-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !37 // CHECK7-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK7-NEXT: store i64 [[ADD19]], i64* [[X]], align 4 +// CHECK7-NEXT: store i64 [[ADD19]], i64* [[X]], align 4, !llvm.access.group !37 // CHECK7-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK7-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 4 +// CHECK7-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !37 // CHECK7-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 // CHECK7-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 // CHECK7-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK7-NEXT: store i8 [[CONV22]], i8* [[Y]], align 4 +// CHECK7-NEXT: store i8 [[CONV22]], i8* [[Y]], align 4, !llvm.access.group !37 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK7-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK7-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -12218,37 +12218,37 @@ int bar(int n){ // CHECK7: omp_if.then: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 // CHECK7-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK7-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !40 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[B_ADDR]], align 4, !llvm.access.group !40 // CHECK7-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP13]] to double // CHECK7-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: store double [[ADD5]], double* [[A]], align 4 +// CHECK7-NEXT: store double [[ADD5]], double* [[A]], align 4, !llvm.access.group !40 // CHECK7-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP14:%.*]] = load double, double* [[A6]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load double, double* [[A6]], align 4, !llvm.access.group !40 // CHECK7-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK7-NEXT: store double [[INC]], double* [[A6]], align 4 +// CHECK7-NEXT: store double [[INC]], double* [[A6]], align 4, !llvm.access.group !40 // CHECK7-NEXT: [[CONV7:%.*]] = fptosi double [[INC]] to i16 // CHECK7-NEXT: [[TMP15:%.*]] = mul nsw i32 1, [[TMP2]] // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP15]] // CHECK7-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK7-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2 +// CHECK7-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2, !llvm.access.group !40 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK7-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK7-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_IF_END:%.*]] // CHECK7: omp_if.else: @@ -12284,7 +12284,7 @@ int bar(int n){ // CHECK7-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK7-NEXT: store i32 [[ADD25]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK7: omp.inner.for.end26: // CHECK7-NEXT: br label [[OMP_IF_END]] // CHECK7: omp_if.end: @@ -12414,42 +12414,42 @@ int bar(int n){ // CHECK7-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK7-NEXT: [[ADD8:%.*]] = add i32 [[TMP17]], 1 // CHECK7-NEXT: [[CMP9:%.*]] = icmp ult i32 [[TMP16]], [[ADD8]] // CHECK7-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK7-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK7-NEXT: [[ADD10:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK7-NEXT: store i32 [[ADD10]], i32* [[I6]], align 4 -// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[ADD10]], i32* [[I6]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !45 // CHECK7-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK7-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK7-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !45 // CHECK7-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK7-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK7-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK7-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 -// CHECK7-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK7-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4, !llvm.access.group !45 // CHECK7-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK7-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK7-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK7-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4 +// CHECK7-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4, !llvm.access.group !45 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK7-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK7-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK7-NEXT: store i32 [[ADD18]], i32* [[ARRAYIDX]], align 4 +// CHECK7-NEXT: store i32 [[ADD18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK7-NEXT: [[ADD19:%.*]] = add i32 [[TMP24]], 1 -// CHECK7-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -12545,35 +12545,35 @@ int bar(int n){ // CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !48 // CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !48 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !48 // CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK7-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK7-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !48 +// CHECK7-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !48 // CHECK7-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK7-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK7-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK7-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK7-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !48 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !48 // CHECK7-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK7-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK7-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !48 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 // CHECK7-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK7-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -12974,23 +12974,23 @@ int bar(int n){ // CHECK8-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK8-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK8-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK8-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK8-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -13064,25 +13064,25 @@ int bar(int n){ // CHECK8-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK8-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK8-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK8-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) // CHECK8-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) -// CHECK8-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) -// CHECK8-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) -// CHECK8-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 -// CHECK8-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !24 -// CHECK8-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !24 -// CHECK8-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !24 -// CHECK8-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !24 -// CHECK8-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !24 -// CHECK8-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !24 -// CHECK8-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !24 -// CHECK8-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !24 +// CHECK8-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) +// CHECK8-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) +// CHECK8-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) +// CHECK8-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !27 +// CHECK8-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 4, !noalias !27 +// CHECK8-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !27 +// CHECK8-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !27 +// CHECK8-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 4, !noalias !27 +// CHECK8-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !27 +// CHECK8-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 4, !noalias !27 +// CHECK8-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 4, !noalias !27 +// CHECK8-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 4, !noalias !27 // CHECK8-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i16**, [3 x i8*]**, [3 x i8*]**, [3 x i64]**)* // CHECK8-NEXT: call void [[TMP15]](i8* [[TMP14]], i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]]) #[[ATTR3]] -// CHECK8-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !24 -// CHECK8-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !24 -// CHECK8-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !24 -// CHECK8-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !24 +// CHECK8-NEXT: [[TMP16:%.*]] = load i16*, i16** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 4, !noalias !27 +// CHECK8-NEXT: [[TMP17:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 4, !noalias !27 +// CHECK8-NEXT: [[TMP18:%.*]] = load [3 x i8*]*, [3 x i8*]** [[DOTFIRSTPRIV_PTR_ADDR2_I]], align 4, !noalias !27 +// CHECK8-NEXT: [[TMP19:%.*]] = load [3 x i64]*, [3 x i64]** [[DOTFIRSTPRIV_PTR_ADDR3_I]], align 4, !noalias !27 // CHECK8-NEXT: [[TMP20:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP17]], i32 0, i32 0 // CHECK8-NEXT: [[TMP21:%.*]] = getelementptr inbounds [3 x i8*], [3 x i8*]* [[TMP18]], i32 0, i32 0 // CHECK8-NEXT: [[TMP22:%.*]] = getelementptr inbounds [3 x i64], [3 x i64]* [[TMP19]], i32 0, i32 0 @@ -13096,14 +13096,14 @@ int bar(int n){ // CHECK8: omp_offload.failed.i: // CHECK8-NEXT: [[TMP28:%.*]] = load i16, i16* [[TMP16]], align 2 // CHECK8-NEXT: [[CONV_I:%.*]] = bitcast i32* [[AA_CASTED_I]] to i16* -// CHECK8-NEXT: store i16 [[TMP28]], i16* [[CONV_I]], align 2, !noalias !24 -// CHECK8-NEXT: [[TMP29:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !24 +// CHECK8-NEXT: store i16 [[TMP28]], i16* [[CONV_I]], align 2, !noalias !27 +// CHECK8-NEXT: [[TMP29:%.*]] = load i32, i32* [[AA_CASTED_I]], align 4, !noalias !27 // CHECK8-NEXT: [[TMP30:%.*]] = load i32, i32* [[TMP23]], align 4 -// CHECK8-NEXT: store i32 [[TMP30]], i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !24 -// CHECK8-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !24 +// CHECK8-NEXT: store i32 [[TMP30]], i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !27 +// CHECK8-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED_I]], align 4, !noalias !27 // CHECK8-NEXT: [[TMP32:%.*]] = load i32, i32* [[TMP24]], align 4 -// CHECK8-NEXT: store i32 [[TMP32]], i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !24 -// CHECK8-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !24 +// CHECK8-NEXT: store i32 [[TMP32]], i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !27 +// CHECK8-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED4_I]], align 4, !noalias !27 // CHECK8-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}__Z3fooi_l97(i32 [[TMP29]], i32 [[TMP31]], i32 [[TMP33]]) #[[ATTR3]] // CHECK8-NEXT: br label [[DOTOMP_OUTLINED__1_EXIT]] // CHECK8: .omp_outlined..1.exit: @@ -13169,10 +13169,10 @@ int bar(int n){ // CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[A1]], align 4, !nontemporal !25 -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[A1]], align 4, !nontemporal !25 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[A1]], align 4, !nontemporal !28 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[A1]], align 4, !nontemporal !28 // CHECK8-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK8-NEXT: store i32 [[ADD3]], i32* [[A1]], align 4, !nontemporal !25 +// CHECK8-NEXT: store i32 [[ADD3]], i32* [[A1]], align 4, !nontemporal !28 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] @@ -13180,7 +13180,7 @@ int bar(int n){ // CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: [[ADD4:%.*]] = add nsw i32 [[TMP9]], 1 // CHECK8-NEXT: store i32 [[ADD4]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -13249,28 +13249,28 @@ int bar(int n){ // CHECK8-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 // CHECK8-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK8-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !31 +// CHECK8-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !31 // CHECK8-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK8-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK8-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK8-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 +// CHECK8-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4, !llvm.access.group !31 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK8-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK8-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -13347,31 +13347,31 @@ int bar(int n){ // CHECK8-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !34 // CHECK8-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK8-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !34 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !34 // CHECK8-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK8-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK8-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK8-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !34 +// CHECK8-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !34 // CHECK8-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK8-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK8-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK8-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK8-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !34 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK8-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK8-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -13487,59 +13487,59 @@ int bar(int n){ // CHECK8-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 // CHECK8-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK8-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !37 +// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !37 // CHECK8-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK8-NEXT: store i32 [[ADD6]], i32* [[A_ADDR]], align 4 +// CHECK8-NEXT: store i32 [[ADD6]], i32* [[A_ADDR]], align 4, !llvm.access.group !37 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK8-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK8-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !37 // CHECK8-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double // CHECK8-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 // CHECK8-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK8-NEXT: store float [[CONV8]], float* [[ARRAYIDX]], align 4 +// CHECK8-NEXT: store float [[CONV8]], float* [[ARRAYIDX]], align 4, !llvm.access.group !37 // CHECK8-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK8-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX9]], align 4 +// CHECK8-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX9]], align 4, !llvm.access.group !37 // CHECK8-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double // CHECK8-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 // CHECK8-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK8-NEXT: store float [[CONV12]], float* [[ARRAYIDX9]], align 4 +// CHECK8-NEXT: store float [[CONV12]], float* [[ARRAYIDX9]], align 4, !llvm.access.group !37 // CHECK8-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 // CHECK8-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2 -// CHECK8-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX14]], align 8 +// CHECK8-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !37 // CHECK8-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK8-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8 +// CHECK8-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8, !llvm.access.group !37 // CHECK8-NEXT: [[TMP20:%.*]] = mul nsw i32 1, [[TMP5]] // CHECK8-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP20]] // CHECK8-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX16]], i32 3 -// CHECK8-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX17]], align 8 +// CHECK8-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !37 // CHECK8-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK8-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8 +// CHECK8-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !37 // CHECK8-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 4 +// CHECK8-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !37 // CHECK8-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK8-NEXT: store i64 [[ADD19]], i64* [[X]], align 4 +// CHECK8-NEXT: store i64 [[ADD19]], i64* [[X]], align 4, !llvm.access.group !37 // CHECK8-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK8-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 4 +// CHECK8-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !37 // CHECK8-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 // CHECK8-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 // CHECK8-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK8-NEXT: store i8 [[CONV22]], i8* [[Y]], align 4 +// CHECK8-NEXT: store i8 [[CONV22]], i8* [[Y]], align 4, !llvm.access.group !37 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK8-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK8-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -14000,37 +14000,37 @@ int bar(int n){ // CHECK8: omp_if.then: // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 // CHECK8-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK8-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !40 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[B_ADDR]], align 4, !llvm.access.group !40 // CHECK8-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP13]] to double // CHECK8-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK8-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK8-NEXT: store double [[ADD5]], double* [[A]], align 4 +// CHECK8-NEXT: store double [[ADD5]], double* [[A]], align 4, !llvm.access.group !40 // CHECK8-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP14:%.*]] = load double, double* [[A6]], align 4 +// CHECK8-NEXT: [[TMP14:%.*]] = load double, double* [[A6]], align 4, !llvm.access.group !40 // CHECK8-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK8-NEXT: store double [[INC]], double* [[A6]], align 4 +// CHECK8-NEXT: store double [[INC]], double* [[A6]], align 4, !llvm.access.group !40 // CHECK8-NEXT: [[CONV7:%.*]] = fptosi double [[INC]] to i16 // CHECK8-NEXT: [[TMP15:%.*]] = mul nsw i32 1, [[TMP2]] // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP15]] // CHECK8-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK8-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2 +// CHECK8-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2, !llvm.access.group !40 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 // CHECK8-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK8-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_IF_END:%.*]] // CHECK8: omp_if.else: @@ -14066,7 +14066,7 @@ int bar(int n){ // CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK8-NEXT: store i32 [[ADD25]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP43:![0-9]+]] // CHECK8: omp.inner.for.end26: // CHECK8-NEXT: br label [[OMP_IF_END]] // CHECK8: omp_if.end: @@ -14196,42 +14196,42 @@ int bar(int n){ // CHECK8-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK8-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !45 // CHECK8-NEXT: [[ADD8:%.*]] = add i32 [[TMP17]], 1 // CHECK8-NEXT: [[CMP9:%.*]] = icmp ult i32 [[TMP16]], [[ADD8]] // CHECK8-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK8-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !45 +// CHECK8-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK8-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK8-NEXT: [[ADD10:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK8-NEXT: store i32 [[ADD10]], i32* [[I6]], align 4 -// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK8-NEXT: store i32 [[ADD10]], i32* [[I6]], align 4, !llvm.access.group !45 +// CHECK8-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !45 // CHECK8-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK8-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4 -// CHECK8-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK8-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4, !llvm.access.group !45 +// CHECK8-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !45 // CHECK8-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK8-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK8-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK8-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 -// CHECK8-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK8-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !45 +// CHECK8-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4, !llvm.access.group !45 // CHECK8-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK8-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK8-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK8-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4 +// CHECK8-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4, !llvm.access.group !45 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK8-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK8-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK8-NEXT: store i32 [[ADD18]], i32* [[ARRAYIDX]], align 4 +// CHECK8-NEXT: store i32 [[ADD18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !45 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 // CHECK8-NEXT: [[ADD19:%.*]] = add i32 [[TMP24]], 1 -// CHECK8-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !45 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP46:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -14327,35 +14327,35 @@ int bar(int n){ // CHECK8-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !48 // CHECK8-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK8-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !48 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !48 // CHECK8-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK8-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK8-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK8-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !48 +// CHECK8-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !48 // CHECK8-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK8-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK8-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK8-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK8-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !48 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !48 // CHECK8-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK8-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK8-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !48 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 // CHECK8-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK8-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !48 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP49:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -18705,23 +18705,23 @@ int bar(int n){ // CHECK17-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -18790,28 +18790,28 @@ int bar(int n){ // CHECK17-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !18 // CHECK17-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK17-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK17-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8 +// CHECK17-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8, !llvm.access.group !18 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK17-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -18891,31 +18891,31 @@ int bar(int n){ // CHECK17-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !21 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK17-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !21 // CHECK17-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK17-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !21 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -19034,59 +19034,59 @@ int bar(int n){ // CHECK17-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !24 // CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK17-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8, !llvm.access.group !24 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK17-NEXT: [[CONV7:%.*]] = fpext float [[TMP17]] to double // CHECK17-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK17-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK17-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4 +// CHECK17-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK17-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK17-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX10]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !24 // CHECK17-NEXT: [[CONV11:%.*]] = fpext float [[TMP18]] to double // CHECK17-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 // CHECK17-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK17-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4 +// CHECK17-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !24 // CHECK17-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 // CHECK17-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX15]], align 8 +// CHECK17-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !24 // CHECK17-NEXT: [[ADD16:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK17-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8 +// CHECK17-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !24 // CHECK17-NEXT: [[TMP20:%.*]] = mul nsw i64 1, [[TMP5]] // CHECK17-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP20]] // CHECK17-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i64 3 -// CHECK17-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX18]], align 8 +// CHECK17-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !24 // CHECK17-NEXT: [[ADD19:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK17-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8 +// CHECK17-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !24 // CHECK17-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 8 +// CHECK17-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !24 // CHECK17-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK17-NEXT: store i64 [[ADD20]], i64* [[X]], align 8 +// CHECK17-NEXT: store i64 [[ADD20]], i64* [[X]], align 8, !llvm.access.group !24 // CHECK17-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 8 +// CHECK17-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !24 // CHECK17-NEXT: [[CONV21:%.*]] = sext i8 [[TMP23]] to i32 // CHECK17-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 // CHECK17-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK17-NEXT: store i8 [[CONV23]], i8* [[Y]], align 8 +// CHECK17-NEXT: store i8 [[CONV23]], i8* [[Y]], align 8, !llvm.access.group !24 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK17-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK17-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -19220,42 +19220,42 @@ int bar(int n){ // CHECK17-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK17-NEXT: [[ADD10:%.*]] = add i32 [[TMP17]], 1 // CHECK17-NEXT: [[CMP11:%.*]] = icmp ult i32 [[TMP16]], [[ADD10]] // CHECK17-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !27 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK17-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK17-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK17-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4, !llvm.access.group !27 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !27 // CHECK17-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK17-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8 -// CHECK17-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK17-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8, !llvm.access.group !27 +// CHECK17-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8, !llvm.access.group !27 // CHECK17-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK17-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK17-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK17-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8 -// CHECK17-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK17-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8, !llvm.access.group !27 +// CHECK17-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8, !llvm.access.group !27 // CHECK17-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK17-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK17-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK17-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8 +// CHECK17-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8, !llvm.access.group !27 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK17-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK17-NEXT: store i32 [[ADD20]], i32* [[ARRAYIDX]], align 4 +// CHECK17-NEXT: store i32 [[ADD20]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK17-NEXT: [[ADD21:%.*]] = add i32 [[TMP24]], 1 -// CHECK17-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -19361,37 +19361,37 @@ int bar(int n){ // CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !30 // CHECK17-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK17-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !30 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !30 // CHECK17-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK17-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK17-NEXT: store double [[ADD5]], double* [[A]], align 8 +// CHECK17-NEXT: store double [[ADD5]], double* [[A]], align 8, !llvm.access.group !30 // CHECK17-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP13:%.*]] = load double, double* [[A6]], align 8 +// CHECK17-NEXT: [[TMP13:%.*]] = load double, double* [[A6]], align 8, !llvm.access.group !30 // CHECK17-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK17-NEXT: store double [[INC]], double* [[A6]], align 8 +// CHECK17-NEXT: store double [[INC]], double* [[A6]], align 8, !llvm.access.group !30 // CHECK17-NEXT: [[CONV7:%.*]] = fptosi double [[INC]] to i16 // CHECK17-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP14]] // CHECK17-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK17-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2 +// CHECK17-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2, !llvm.access.group !30 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -19477,35 +19477,35 @@ int bar(int n){ // CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !33 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK17-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !33 +// CHECK17-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !33 // CHECK17-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK17-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK17-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !33 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK17-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4 +// CHECK17-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -19584,23 +19584,23 @@ int bar(int n){ // CHECK18-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK18-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK18-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK18-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK18-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -19669,28 +19669,28 @@ int bar(int n){ // CHECK18-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK18-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK18-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK18-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK18-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !18 // CHECK18-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK18-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK18-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8 +// CHECK18-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8, !llvm.access.group !18 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK18-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK18-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -19770,31 +19770,31 @@ int bar(int n){ // CHECK18-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK18-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK18-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !21 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK18-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK18-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !21 +// CHECK18-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !21 // CHECK18-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK18-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK18-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK18-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !21 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK18-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK18-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -19913,59 +19913,59 @@ int bar(int n){ // CHECK18-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK18-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK18-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 +// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !24 // CHECK18-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK18-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8, !llvm.access.group !24 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK18-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK18-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK18-NEXT: [[CONV7:%.*]] = fpext float [[TMP17]] to double // CHECK18-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK18-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK18-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4 +// CHECK18-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK18-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK18-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX10]], align 4 +// CHECK18-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !24 // CHECK18-NEXT: [[CONV11:%.*]] = fpext float [[TMP18]] to double // CHECK18-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 // CHECK18-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK18-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4 +// CHECK18-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !24 // CHECK18-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 // CHECK18-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i64 0, i64 2 -// CHECK18-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX15]], align 8 +// CHECK18-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !24 // CHECK18-NEXT: [[ADD16:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK18-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8 +// CHECK18-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !24 // CHECK18-NEXT: [[TMP20:%.*]] = mul nsw i64 1, [[TMP5]] // CHECK18-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP20]] // CHECK18-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i64 3 -// CHECK18-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX18]], align 8 +// CHECK18-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !24 // CHECK18-NEXT: [[ADD19:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK18-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8 +// CHECK18-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !24 // CHECK18-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK18-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 8 +// CHECK18-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !24 // CHECK18-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK18-NEXT: store i64 [[ADD20]], i64* [[X]], align 8 +// CHECK18-NEXT: store i64 [[ADD20]], i64* [[X]], align 8, !llvm.access.group !24 // CHECK18-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK18-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 8 +// CHECK18-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !24 // CHECK18-NEXT: [[CONV21:%.*]] = sext i8 [[TMP23]] to i32 // CHECK18-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 // CHECK18-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK18-NEXT: store i8 [[CONV23]], i8* [[Y]], align 8 +// CHECK18-NEXT: store i8 [[CONV23]], i8* [[Y]], align 8, !llvm.access.group !24 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK18-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK18-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -20099,42 +20099,42 @@ int bar(int n){ // CHECK18-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK18-NEXT: [[ADD10:%.*]] = add i32 [[TMP17]], 1 // CHECK18-NEXT: [[CMP11:%.*]] = icmp ult i32 [[TMP16]], [[ADD10]] // CHECK18-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !27 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK18-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK18-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK18-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4 -// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4, !llvm.access.group !27 +// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !27 // CHECK18-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK18-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8 -// CHECK18-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK18-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8, !llvm.access.group !27 +// CHECK18-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8, !llvm.access.group !27 // CHECK18-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK18-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK18-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK18-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8 -// CHECK18-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK18-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8, !llvm.access.group !27 +// CHECK18-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8, !llvm.access.group !27 // CHECK18-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK18-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK18-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK18-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8 +// CHECK18-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8, !llvm.access.group !27 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK18-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK18-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK18-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK18-NEXT: store i32 [[ADD20]], i32* [[ARRAYIDX]], align 4 +// CHECK18-NEXT: store i32 [[ADD20]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK18-NEXT: [[ADD21:%.*]] = add i32 [[TMP24]], 1 -// CHECK18-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -20240,37 +20240,37 @@ int bar(int n){ // CHECK18-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !30 // CHECK18-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK18-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !30 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !30 // CHECK18-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP12]] to double // CHECK18-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK18-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK18-NEXT: store double [[ADD5]], double* [[A]], align 8 +// CHECK18-NEXT: store double [[ADD5]], double* [[A]], align 8, !llvm.access.group !30 // CHECK18-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK18-NEXT: [[TMP13:%.*]] = load double, double* [[A6]], align 8 +// CHECK18-NEXT: [[TMP13:%.*]] = load double, double* [[A6]], align 8, !llvm.access.group !30 // CHECK18-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK18-NEXT: store double [[INC]], double* [[A6]], align 8 +// CHECK18-NEXT: store double [[INC]], double* [[A6]], align 8, !llvm.access.group !30 // CHECK18-NEXT: [[CONV7:%.*]] = fptosi double [[INC]] to i16 // CHECK18-NEXT: [[TMP14:%.*]] = mul nsw i64 1, [[TMP2]] // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP14]] // CHECK18-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK18-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2 +// CHECK18-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2, !llvm.access.group !30 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK18-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK18-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -20356,35 +20356,35 @@ int bar(int n){ // CHECK18-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK18-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK18-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !33 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK18-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK18-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !33 +// CHECK18-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !33 // CHECK18-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK18-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK18-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK18-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK18-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !33 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK18-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK18-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4 +// CHECK18-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK18-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -20461,23 +20461,23 @@ int bar(int n){ // CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK19-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -20546,28 +20546,28 @@ int bar(int n){ // CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !19 // CHECK19-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK19-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK19-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 +// CHECK19-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4, !llvm.access.group !19 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK19-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -20644,31 +20644,31 @@ int bar(int n){ // CHECK19-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !22 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK19-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !22 // CHECK19-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK19-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !22 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -20784,59 +20784,59 @@ int bar(int n){ // CHECK19-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !25 // CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK19-NEXT: store i32 [[ADD6]], i32* [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[ADD6]], i32* [[A_ADDR]], align 4, !llvm.access.group !25 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK19-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double // CHECK19-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 // CHECK19-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK19-NEXT: store float [[CONV8]], float* [[ARRAYIDX]], align 4 +// CHECK19-NEXT: store float [[CONV8]], float* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK19-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK19-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX9]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX9]], align 4, !llvm.access.group !25 // CHECK19-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double // CHECK19-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 // CHECK19-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK19-NEXT: store float [[CONV12]], float* [[ARRAYIDX9]], align 4 +// CHECK19-NEXT: store float [[CONV12]], float* [[ARRAYIDX9]], align 4, !llvm.access.group !25 // CHECK19-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 // CHECK19-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX14]], align 8 +// CHECK19-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !25 // CHECK19-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK19-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8 +// CHECK19-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8, !llvm.access.group !25 // CHECK19-NEXT: [[TMP20:%.*]] = mul nsw i32 1, [[TMP5]] // CHECK19-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP20]] // CHECK19-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX16]], i32 3 -// CHECK19-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX17]], align 8 +// CHECK19-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !25 // CHECK19-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK19-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8 +// CHECK19-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !25 // CHECK19-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 4 +// CHECK19-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !25 // CHECK19-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK19-NEXT: store i64 [[ADD19]], i64* [[X]], align 4 +// CHECK19-NEXT: store i64 [[ADD19]], i64* [[X]], align 4, !llvm.access.group !25 // CHECK19-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK19-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !25 // CHECK19-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 // CHECK19-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 // CHECK19-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK19-NEXT: store i8 [[CONV22]], i8* [[Y]], align 4 +// CHECK19-NEXT: store i8 [[CONV22]], i8* [[Y]], align 4, !llvm.access.group !25 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK19-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK19-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -20964,42 +20964,42 @@ int bar(int n){ // CHECK19-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 // CHECK19-NEXT: [[ADD8:%.*]] = add i32 [[TMP17]], 1 // CHECK19-NEXT: [[CMP9:%.*]] = icmp ult i32 [[TMP16]], [[ADD8]] // CHECK19-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !28 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK19-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK19-NEXT: [[ADD10:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK19-NEXT: store i32 [[ADD10]], i32* [[I6]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[ADD10]], i32* [[I6]], align 4, !llvm.access.group !28 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !28 // CHECK19-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK19-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4, !llvm.access.group !28 +// CHECK19-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !28 // CHECK19-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK19-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK19-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK19-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK19-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !28 +// CHECK19-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4, !llvm.access.group !28 // CHECK19-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK19-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK19-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK19-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4 +// CHECK19-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4, !llvm.access.group !28 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK19-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK19-NEXT: store i32 [[ADD18]], i32* [[ARRAYIDX]], align 4 +// CHECK19-NEXT: store i32 [[ADD18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK19-NEXT: [[ADD19:%.*]] = add i32 [[TMP24]], 1 -// CHECK19-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -21102,37 +21102,37 @@ int bar(int n){ // CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 // CHECK19-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK19-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !31 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[B_ADDR]], align 4, !llvm.access.group !31 // CHECK19-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double // CHECK19-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 // CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK19-NEXT: store double [[ADD4]], double* [[A]], align 4 +// CHECK19-NEXT: store double [[ADD4]], double* [[A]], align 4, !llvm.access.group !31 // CHECK19-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP13:%.*]] = load double, double* [[A5]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load double, double* [[A5]], align 4, !llvm.access.group !31 // CHECK19-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK19-NEXT: store double [[INC]], double* [[A5]], align 4 +// CHECK19-NEXT: store double [[INC]], double* [[A5]], align 4, !llvm.access.group !31 // CHECK19-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 // CHECK19-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP14]] // CHECK19-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK19-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// CHECK19-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2, !llvm.access.group !31 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK19-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -21215,35 +21215,35 @@ int bar(int n){ // CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !34 // CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !34 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !34 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK19-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK19-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !34 +// CHECK19-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !34 // CHECK19-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK19-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK19-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK19-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !34 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !34 // CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK19-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK19-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !34 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK19-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -21320,23 +21320,23 @@ int bar(int n){ // CHECK20-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK20-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK20-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK20-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK20-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK20-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -21405,28 +21405,28 @@ int bar(int n){ // CHECK20-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK20-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK20-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK20-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK20-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 +// CHECK20-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !19 // CHECK20-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK20-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK20-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK20-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 +// CHECK20-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4, !llvm.access.group !19 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK20-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK20-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -21503,31 +21503,31 @@ int bar(int n){ // CHECK20-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK20-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK20-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK20-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !22 // CHECK20-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK20-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK20-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !22 +// CHECK20-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !22 // CHECK20-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK20-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK20-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK20-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK20-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !22 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK20-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK20-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -21643,59 +21643,59 @@ int bar(int n){ // CHECK20-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK20-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK20-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 +// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !25 // CHECK20-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK20-NEXT: store i32 [[ADD6]], i32* [[A_ADDR]], align 4 +// CHECK20-NEXT: store i32 [[ADD6]], i32* [[A_ADDR]], align 4, !llvm.access.group !25 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK20-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK20-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK20-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double // CHECK20-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 // CHECK20-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK20-NEXT: store float [[CONV8]], float* [[ARRAYIDX]], align 4 +// CHECK20-NEXT: store float [[CONV8]], float* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK20-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK20-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX9]], align 4 +// CHECK20-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX9]], align 4, !llvm.access.group !25 // CHECK20-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double // CHECK20-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 // CHECK20-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK20-NEXT: store float [[CONV12]], float* [[ARRAYIDX9]], align 4 +// CHECK20-NEXT: store float [[CONV12]], float* [[ARRAYIDX9]], align 4, !llvm.access.group !25 // CHECK20-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 // CHECK20-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2 -// CHECK20-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX14]], align 8 +// CHECK20-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !25 // CHECK20-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK20-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8 +// CHECK20-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8, !llvm.access.group !25 // CHECK20-NEXT: [[TMP20:%.*]] = mul nsw i32 1, [[TMP5]] // CHECK20-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP20]] // CHECK20-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX16]], i32 3 -// CHECK20-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX17]], align 8 +// CHECK20-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !25 // CHECK20-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK20-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8 +// CHECK20-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !25 // CHECK20-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK20-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 4 +// CHECK20-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !25 // CHECK20-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK20-NEXT: store i64 [[ADD19]], i64* [[X]], align 4 +// CHECK20-NEXT: store i64 [[ADD19]], i64* [[X]], align 4, !llvm.access.group !25 // CHECK20-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK20-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 4 +// CHECK20-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !25 // CHECK20-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 // CHECK20-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 // CHECK20-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK20-NEXT: store i8 [[CONV22]], i8* [[Y]], align 4 +// CHECK20-NEXT: store i8 [[CONV22]], i8* [[Y]], align 4, !llvm.access.group !25 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK20-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK20-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -21823,42 +21823,42 @@ int bar(int n){ // CHECK20-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 // CHECK20-NEXT: [[ADD8:%.*]] = add i32 [[TMP17]], 1 // CHECK20-NEXT: [[CMP9:%.*]] = icmp ult i32 [[TMP16]], [[ADD8]] // CHECK20-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !28 +// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK20-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK20-NEXT: [[ADD10:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK20-NEXT: store i32 [[ADD10]], i32* [[I6]], align 4 -// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK20-NEXT: store i32 [[ADD10]], i32* [[I6]], align 4, !llvm.access.group !28 +// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !28 // CHECK20-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK20-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4 -// CHECK20-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4, !llvm.access.group !28 +// CHECK20-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !28 // CHECK20-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK20-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK20-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK20-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 -// CHECK20-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK20-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !28 +// CHECK20-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4, !llvm.access.group !28 // CHECK20-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK20-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK20-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK20-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4 +// CHECK20-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4, !llvm.access.group !28 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK20-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK20-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK20-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK20-NEXT: store i32 [[ADD18]], i32* [[ARRAYIDX]], align 4 +// CHECK20-NEXT: store i32 [[ADD18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK20-NEXT: [[ADD19:%.*]] = add i32 [[TMP24]], 1 -// CHECK20-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -21961,37 +21961,37 @@ int bar(int n){ // CHECK20-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 // CHECK20-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK20-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !31 +// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[B_ADDR]], align 4, !llvm.access.group !31 // CHECK20-NEXT: [[CONV:%.*]] = sitofp i32 [[TMP12]] to double // CHECK20-NEXT: [[ADD4:%.*]] = fadd double [[CONV]], 1.500000e+00 // CHECK20-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK20-NEXT: store double [[ADD4]], double* [[A]], align 4 +// CHECK20-NEXT: store double [[ADD4]], double* [[A]], align 4, !llvm.access.group !31 // CHECK20-NEXT: [[A5:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK20-NEXT: [[TMP13:%.*]] = load double, double* [[A5]], align 4 +// CHECK20-NEXT: [[TMP13:%.*]] = load double, double* [[A5]], align 4, !llvm.access.group !31 // CHECK20-NEXT: [[INC:%.*]] = fadd double [[TMP13]], 1.000000e+00 -// CHECK20-NEXT: store double [[INC]], double* [[A5]], align 4 +// CHECK20-NEXT: store double [[INC]], double* [[A5]], align 4, !llvm.access.group !31 // CHECK20-NEXT: [[CONV6:%.*]] = fptosi double [[INC]] to i16 // CHECK20-NEXT: [[TMP14:%.*]] = mul nsw i32 1, [[TMP2]] // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP14]] // CHECK20-NEXT: [[ARRAYIDX7:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK20-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2 +// CHECK20-NEXT: store i16 [[CONV6]], i16* [[ARRAYIDX7]], align 2, !llvm.access.group !31 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK20-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP15]], 1 -// CHECK20-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -22074,35 +22074,35 @@ int bar(int n){ // CHECK20-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !34 // CHECK20-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK20-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !34 +// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !34 // CHECK20-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK20-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK20-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK20-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !34 +// CHECK20-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !34 // CHECK20-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK20-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK20-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK20-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK20-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !34 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !34 // CHECK20-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK20-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK20-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !34 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 // CHECK20-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK20-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -22181,23 +22181,23 @@ int bar(int n){ // CHECK21-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK21-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK21-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK21-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: @@ -22266,28 +22266,28 @@ int bar(int n){ // CHECK21-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK21-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK21-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !18 // CHECK21-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK21-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK21-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8 +// CHECK21-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8, !llvm.access.group !18 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK21-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK21-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: @@ -22367,31 +22367,31 @@ int bar(int n){ // CHECK21-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK21-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK21-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !21 // CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK21-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK21-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK21-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !21 +// CHECK21-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !21 // CHECK21-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK21-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK21-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK21-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !21 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK21-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK21-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK21-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: @@ -22510,59 +22510,59 @@ int bar(int n){ // CHECK21-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK21-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK21-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK21-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK21-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !24 // CHECK21-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK21-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8 +// CHECK21-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8, !llvm.access.group !24 // CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK21-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK21-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK21-NEXT: [[CONV7:%.*]] = fpext float [[TMP17]] to double // CHECK21-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK21-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK21-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4 +// CHECK21-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK21-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK21-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX10]], align 4 +// CHECK21-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !24 // CHECK21-NEXT: [[CONV11:%.*]] = fpext float [[TMP18]] to double // CHECK21-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 // CHECK21-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK21-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4 +// CHECK21-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !24 // CHECK21-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 // CHECK21-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i64 0, i64 2 -// CHECK21-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX15]], align 8 +// CHECK21-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !24 // CHECK21-NEXT: [[ADD16:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK21-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8 +// CHECK21-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !24 // CHECK21-NEXT: [[TMP20:%.*]] = mul nsw i64 1, [[TMP5]] // CHECK21-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP20]] // CHECK21-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i64 3 -// CHECK21-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX18]], align 8 +// CHECK21-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !24 // CHECK21-NEXT: [[ADD19:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK21-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8 +// CHECK21-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !24 // CHECK21-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK21-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 8 +// CHECK21-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !24 // CHECK21-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK21-NEXT: store i64 [[ADD20]], i64* [[X]], align 8 +// CHECK21-NEXT: store i64 [[ADD20]], i64* [[X]], align 8, !llvm.access.group !24 // CHECK21-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK21-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 8 +// CHECK21-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !24 // CHECK21-NEXT: [[CONV21:%.*]] = sext i8 [[TMP23]] to i32 // CHECK21-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 // CHECK21-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK21-NEXT: store i8 [[CONV23]], i8* [[Y]], align 8 +// CHECK21-NEXT: store i8 [[CONV23]], i8* [[Y]], align 8, !llvm.access.group !24 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK21-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK21-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK21-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: @@ -22696,42 +22696,42 @@ int bar(int n){ // CHECK21-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK21-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK21-NEXT: [[ADD10:%.*]] = add i32 [[TMP17]], 1 // CHECK21-NEXT: [[CMP11:%.*]] = icmp ult i32 [[TMP16]], [[ADD10]] // CHECK21-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK21-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !27 +// CHECK21-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK21-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK21-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK21-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4 -// CHECK21-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4, !llvm.access.group !27 +// CHECK21-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !27 // CHECK21-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK21-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8 -// CHECK21-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK21-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8, !llvm.access.group !27 +// CHECK21-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8, !llvm.access.group !27 // CHECK21-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK21-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK21-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK21-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8 -// CHECK21-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK21-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8, !llvm.access.group !27 +// CHECK21-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8, !llvm.access.group !27 // CHECK21-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK21-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK21-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK21-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8 +// CHECK21-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8, !llvm.access.group !27 // CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK21-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK21-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK21-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK21-NEXT: store i32 [[ADD20]], i32* [[ARRAYIDX]], align 4 +// CHECK21-NEXT: store i32 [[ADD20]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK21-NEXT: [[ADD21:%.*]] = add i32 [[TMP24]], 1 -// CHECK21-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK21-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: @@ -22854,37 +22854,37 @@ int bar(int n){ // CHECK21: omp_if.then: // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !30 // CHECK21-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK21-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !30 +// CHECK21-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !30 // CHECK21-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP13]] to double // CHECK21-NEXT: [[ADD6:%.*]] = fadd double [[CONV5]], 1.500000e+00 // CHECK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: store double [[ADD6]], double* [[A]], align 8 +// CHECK21-NEXT: store double [[ADD6]], double* [[A]], align 8, !llvm.access.group !30 // CHECK21-NEXT: [[A7:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: [[TMP14:%.*]] = load double, double* [[A7]], align 8 +// CHECK21-NEXT: [[TMP14:%.*]] = load double, double* [[A7]], align 8, !llvm.access.group !30 // CHECK21-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK21-NEXT: store double [[INC]], double* [[A7]], align 8 +// CHECK21-NEXT: store double [[INC]], double* [[A7]], align 8, !llvm.access.group !30 // CHECK21-NEXT: [[CONV8:%.*]] = fptosi double [[INC]] to i16 // CHECK21-NEXT: [[TMP15:%.*]] = mul nsw i64 1, [[TMP2]] // CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP15]] // CHECK21-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK21-NEXT: store i16 [[CONV8]], i16* [[ARRAYIDX9]], align 2 +// CHECK21-NEXT: store i16 [[CONV8]], i16* [[ARRAYIDX9]], align 2, !llvm.access.group !30 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK21-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK21-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK21-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_IF_END:%.*]] // CHECK21: omp_if.else: @@ -22920,7 +22920,7 @@ int bar(int n){ // CHECK21-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK21-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK21-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND11]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND11]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK21: omp.inner.for.end27: // CHECK21-NEXT: br label [[OMP_IF_END]] // CHECK21: omp_if.end: @@ -23008,35 +23008,35 @@ int bar(int n){ // CHECK21-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 // CHECK21-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK21-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !35 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !35 // CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK21-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK21-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK21-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !35 +// CHECK21-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !35 // CHECK21-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK21-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK21-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK21-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK21-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !35 // CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK21-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK21-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4 +// CHECK21-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK21-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK21-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK21-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK21: omp.loop.exit: @@ -23115,23 +23115,23 @@ int bar(int n){ // CHECK22-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK22-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK22: omp.inner.for.cond: -// CHECK22-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK22-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK22-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK22-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK22-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK22: omp.inner.for.body: -// CHECK22-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK22-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK22-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK22-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK22-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK22: omp.body.continue: // CHECK22-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK22: omp.inner.for.inc: -// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK22-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK22-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK22-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK22: omp.inner.for.end: // CHECK22-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK22: omp.loop.exit: @@ -23200,28 +23200,28 @@ int bar(int n){ // CHECK22-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK22-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK22: omp.inner.for.cond: -// CHECK22-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK22-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK22-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK22-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK22-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK22: omp.inner.for.body: -// CHECK22-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK22-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK22-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK22-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK22-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8 +// CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK22-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 8, !llvm.access.group !18 // CHECK22-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK22-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK22-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK22-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8 +// CHECK22-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 8, !llvm.access.group !18 // CHECK22-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK22: omp.body.continue: // CHECK22-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK22: omp.inner.for.inc: -// CHECK22-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK22-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK22-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK22-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK22-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK22: omp.inner.for.end: // CHECK22-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK22: omp.loop.exit: @@ -23301,31 +23301,31 @@ int bar(int n){ // CHECK22-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK22-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK22: omp.inner.for.cond: -// CHECK22-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK22-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK22-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK22-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK22-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK22: omp.inner.for.body: -// CHECK22-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK22-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK22-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK22-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !21 // CHECK22-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK22-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK22-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK22-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !21 +// CHECK22-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !21 // CHECK22-NEXT: [[CONV4:%.*]] = sext i16 [[TMP9]] to i32 // CHECK22-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK22-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK22-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK22-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !21 // CHECK22-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK22: omp.body.continue: // CHECK22-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK22: omp.inner.for.inc: -// CHECK22-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK22-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK22-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK22-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK22-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK22: omp.inner.for.end: // CHECK22-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK22: omp.loop.exit: @@ -23444,59 +23444,59 @@ int bar(int n){ // CHECK22-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK22-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK22: omp.inner.for.cond: -// CHECK22-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK22-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK22-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK22-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK22-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK22: omp.inner.for.body: -// CHECK22-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK22-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK22-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK22-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK22-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 +// CHECK22-NEXT: [[TMP16:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !24 // CHECK22-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK22-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8 +// CHECK22-NEXT: store i32 [[ADD6]], i32* [[CONV]], align 8, !llvm.access.group !24 // CHECK22-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i64 0, i64 2 -// CHECK22-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK22-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK22-NEXT: [[CONV7:%.*]] = fpext float [[TMP17]] to double // CHECK22-NEXT: [[ADD8:%.*]] = fadd double [[CONV7]], 1.000000e+00 // CHECK22-NEXT: [[CONV9:%.*]] = fptrunc double [[ADD8]] to float -// CHECK22-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4 +// CHECK22-NEXT: store float [[CONV9]], float* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK22-NEXT: [[ARRAYIDX10:%.*]] = getelementptr inbounds float, float* [[TMP2]], i64 3 -// CHECK22-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX10]], align 4 +// CHECK22-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX10]], align 4, !llvm.access.group !24 // CHECK22-NEXT: [[CONV11:%.*]] = fpext float [[TMP18]] to double // CHECK22-NEXT: [[ADD12:%.*]] = fadd double [[CONV11]], 1.000000e+00 // CHECK22-NEXT: [[CONV13:%.*]] = fptrunc double [[ADD12]] to float -// CHECK22-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4 +// CHECK22-NEXT: store float [[CONV13]], float* [[ARRAYIDX10]], align 4, !llvm.access.group !24 // CHECK22-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i64 0, i64 1 // CHECK22-NEXT: [[ARRAYIDX15:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX14]], i64 0, i64 2 -// CHECK22-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX15]], align 8 +// CHECK22-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX15]], align 8, !llvm.access.group !24 // CHECK22-NEXT: [[ADD16:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK22-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8 +// CHECK22-NEXT: store double [[ADD16]], double* [[ARRAYIDX15]], align 8, !llvm.access.group !24 // CHECK22-NEXT: [[TMP20:%.*]] = mul nsw i64 1, [[TMP5]] // CHECK22-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[TMP6]], i64 [[TMP20]] // CHECK22-NEXT: [[ARRAYIDX18:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX17]], i64 3 -// CHECK22-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX18]], align 8 +// CHECK22-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX18]], align 8, !llvm.access.group !24 // CHECK22-NEXT: [[ADD19:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK22-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8 +// CHECK22-NEXT: store double [[ADD19]], double* [[ARRAYIDX18]], align 8, !llvm.access.group !24 // CHECK22-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK22-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 8 +// CHECK22-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 8, !llvm.access.group !24 // CHECK22-NEXT: [[ADD20:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK22-NEXT: store i64 [[ADD20]], i64* [[X]], align 8 +// CHECK22-NEXT: store i64 [[ADD20]], i64* [[X]], align 8, !llvm.access.group !24 // CHECK22-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK22-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 8 +// CHECK22-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 8, !llvm.access.group !24 // CHECK22-NEXT: [[CONV21:%.*]] = sext i8 [[TMP23]] to i32 // CHECK22-NEXT: [[ADD22:%.*]] = add nsw i32 [[CONV21]], 1 // CHECK22-NEXT: [[CONV23:%.*]] = trunc i32 [[ADD22]] to i8 -// CHECK22-NEXT: store i8 [[CONV23]], i8* [[Y]], align 8 +// CHECK22-NEXT: store i8 [[CONV23]], i8* [[Y]], align 8, !llvm.access.group !24 // CHECK22-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK22: omp.body.continue: // CHECK22-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK22: omp.inner.for.inc: -// CHECK22-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK22-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK22-NEXT: [[ADD24:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK22-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK22-NEXT: store i32 [[ADD24]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK22: omp.inner.for.end: // CHECK22-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK22: omp.loop.exit: @@ -23630,42 +23630,42 @@ int bar(int n){ // CHECK22-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK22-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK22: omp.inner.for.cond: -// CHECK22-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK22-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK22-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK22-NEXT: [[ADD10:%.*]] = add i32 [[TMP17]], 1 // CHECK22-NEXT: [[CMP11:%.*]] = icmp ult i32 [[TMP16]], [[ADD10]] // CHECK22-NEXT: br i1 [[CMP11]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK22: omp.inner.for.body: -// CHECK22-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK22-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK22-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !27 +// CHECK22-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK22-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK22-NEXT: [[ADD12:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK22-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4 -// CHECK22-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: store i32 [[ADD12]], i32* [[I8]], align 4, !llvm.access.group !27 +// CHECK22-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !27 // CHECK22-NEXT: [[ADD13:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK22-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8 -// CHECK22-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8 +// CHECK22-NEXT: store i32 [[ADD13]], i32* [[CONV]], align 8, !llvm.access.group !27 +// CHECK22-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV2]], align 8, !llvm.access.group !27 // CHECK22-NEXT: [[CONV14:%.*]] = sext i16 [[TMP21]] to i32 // CHECK22-NEXT: [[ADD15:%.*]] = add nsw i32 [[CONV14]], 1 // CHECK22-NEXT: [[CONV16:%.*]] = trunc i32 [[ADD15]] to i16 -// CHECK22-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8 -// CHECK22-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8 +// CHECK22-NEXT: store i16 [[CONV16]], i16* [[CONV2]], align 8, !llvm.access.group !27 +// CHECK22-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV3]], align 8, !llvm.access.group !27 // CHECK22-NEXT: [[CONV17:%.*]] = sext i8 [[TMP22]] to i32 // CHECK22-NEXT: [[ADD18:%.*]] = add nsw i32 [[CONV17]], 1 // CHECK22-NEXT: [[CONV19:%.*]] = trunc i32 [[ADD18]] to i8 -// CHECK22-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8 +// CHECK22-NEXT: store i8 [[CONV19]], i8* [[CONV3]], align 8, !llvm.access.group !27 // CHECK22-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK22-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK22-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK22-NEXT: [[ADD20:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK22-NEXT: store i32 [[ADD20]], i32* [[ARRAYIDX]], align 4 +// CHECK22-NEXT: store i32 [[ADD20]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK22-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK22: omp.body.continue: // CHECK22-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK22: omp.inner.for.inc: -// CHECK22-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK22-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK22-NEXT: [[ADD21:%.*]] = add i32 [[TMP24]], 1 -// CHECK22-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK22-NEXT: store i32 [[ADD21]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK22: omp.inner.for.end: // CHECK22-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK22: omp.loop.exit: @@ -23788,37 +23788,37 @@ int bar(int n){ // CHECK22: omp_if.then: // CHECK22-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK22: omp.inner.for.cond: -// CHECK22-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK22-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK22-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !30 // CHECK22-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK22-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK22: omp.inner.for.body: -// CHECK22-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK22-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK22-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK22-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK22-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !30 +// CHECK22-NEXT: [[TMP13:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !30 // CHECK22-NEXT: [[CONV5:%.*]] = sitofp i32 [[TMP13]] to double // CHECK22-NEXT: [[ADD6:%.*]] = fadd double [[CONV5]], 1.500000e+00 // CHECK22-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK22-NEXT: store double [[ADD6]], double* [[A]], align 8 +// CHECK22-NEXT: store double [[ADD6]], double* [[A]], align 8, !llvm.access.group !30 // CHECK22-NEXT: [[A7:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK22-NEXT: [[TMP14:%.*]] = load double, double* [[A7]], align 8 +// CHECK22-NEXT: [[TMP14:%.*]] = load double, double* [[A7]], align 8, !llvm.access.group !30 // CHECK22-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK22-NEXT: store double [[INC]], double* [[A7]], align 8 +// CHECK22-NEXT: store double [[INC]], double* [[A7]], align 8, !llvm.access.group !30 // CHECK22-NEXT: [[CONV8:%.*]] = fptosi double [[INC]] to i16 // CHECK22-NEXT: [[TMP15:%.*]] = mul nsw i64 1, [[TMP2]] // CHECK22-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i64 [[TMP15]] // CHECK22-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i64 1 -// CHECK22-NEXT: store i16 [[CONV8]], i16* [[ARRAYIDX9]], align 2 +// CHECK22-NEXT: store i16 [[CONV8]], i16* [[ARRAYIDX9]], align 2, !llvm.access.group !30 // CHECK22-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK22: omp.body.continue: // CHECK22-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK22: omp.inner.for.inc: -// CHECK22-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK22-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 // CHECK22-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK22-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK22-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK22: omp.inner.for.end: // CHECK22-NEXT: br label [[OMP_IF_END:%.*]] // CHECK22: omp_if.else: @@ -23854,7 +23854,7 @@ int bar(int n){ // CHECK22-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK22-NEXT: [[ADD26:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK22-NEXT: store i32 [[ADD26]], i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND11]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND11]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK22: omp.inner.for.end27: // CHECK22-NEXT: br label [[OMP_IF_END]] // CHECK22: omp_if.end: @@ -23942,35 +23942,35 @@ int bar(int n){ // CHECK22-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK22-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK22: omp.inner.for.cond: -// CHECK22-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK22-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK22-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !35 // CHECK22-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK22-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK22: omp.inner.for.body: -// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK22-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK22-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK22-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !35 +// CHECK22-NEXT: [[TMP9:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !35 // CHECK22-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK22-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8 -// CHECK22-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8 +// CHECK22-NEXT: store i32 [[ADD3]], i32* [[CONV]], align 8, !llvm.access.group !35 +// CHECK22-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV1]], align 8, !llvm.access.group !35 // CHECK22-NEXT: [[CONV4:%.*]] = sext i16 [[TMP10]] to i32 // CHECK22-NEXT: [[ADD5:%.*]] = add nsw i32 [[CONV4]], 1 // CHECK22-NEXT: [[CONV6:%.*]] = trunc i32 [[ADD5]] to i16 -// CHECK22-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8 +// CHECK22-NEXT: store i16 [[CONV6]], i16* [[CONV1]], align 8, !llvm.access.group !35 // CHECK22-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 2 -// CHECK22-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK22-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK22-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK22-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4 +// CHECK22-NEXT: store i32 [[ADD7]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !35 // CHECK22-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK22: omp.body.continue: // CHECK22-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK22: omp.inner.for.inc: -// CHECK22-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK22-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 // CHECK22-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK22-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK22-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK22: omp.inner.for.end: // CHECK22-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK22: omp.loop.exit: @@ -24047,23 +24047,23 @@ int bar(int n){ // CHECK23-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK23-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK23-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: @@ -24132,28 +24132,28 @@ int bar(int n){ // CHECK23-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 +// CHECK23-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !19 // CHECK23-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK23-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK23-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK23-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 +// CHECK23-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4, !llvm.access.group !19 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK23-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK23-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK23-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: @@ -24230,31 +24230,31 @@ int bar(int n){ // CHECK23-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !22 // CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK23-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK23-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !22 +// CHECK23-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !22 // CHECK23-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK23-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK23-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK23-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK23-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !22 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK23-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK23-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK23-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: @@ -24370,59 +24370,59 @@ int bar(int n){ // CHECK23-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK23-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK23-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK23-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK23-NEXT: [[TMP16:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !25 // CHECK23-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK23-NEXT: store i32 [[ADD6]], i32* [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[ADD6]], i32* [[A_ADDR]], align 4, !llvm.access.group !25 // CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK23-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK23-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK23-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double // CHECK23-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 // CHECK23-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK23-NEXT: store float [[CONV8]], float* [[ARRAYIDX]], align 4 +// CHECK23-NEXT: store float [[CONV8]], float* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK23-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK23-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX9]], align 4 +// CHECK23-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX9]], align 4, !llvm.access.group !25 // CHECK23-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double // CHECK23-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 // CHECK23-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK23-NEXT: store float [[CONV12]], float* [[ARRAYIDX9]], align 4 +// CHECK23-NEXT: store float [[CONV12]], float* [[ARRAYIDX9]], align 4, !llvm.access.group !25 // CHECK23-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 // CHECK23-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2 -// CHECK23-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX14]], align 8 +// CHECK23-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !25 // CHECK23-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK23-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8 +// CHECK23-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8, !llvm.access.group !25 // CHECK23-NEXT: [[TMP20:%.*]] = mul nsw i32 1, [[TMP5]] // CHECK23-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP20]] // CHECK23-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX16]], i32 3 -// CHECK23-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX17]], align 8 +// CHECK23-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !25 // CHECK23-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK23-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8 +// CHECK23-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !25 // CHECK23-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK23-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 4 +// CHECK23-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !25 // CHECK23-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK23-NEXT: store i64 [[ADD19]], i64* [[X]], align 4 +// CHECK23-NEXT: store i64 [[ADD19]], i64* [[X]], align 4, !llvm.access.group !25 // CHECK23-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK23-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 4 +// CHECK23-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !25 // CHECK23-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 // CHECK23-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 // CHECK23-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK23-NEXT: store i8 [[CONV22]], i8* [[Y]], align 4 +// CHECK23-NEXT: store i8 [[CONV22]], i8* [[Y]], align 4, !llvm.access.group !25 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK23-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK23-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK23-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: @@ -24550,42 +24550,42 @@ int bar(int n){ // CHECK23-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 // CHECK23-NEXT: [[ADD8:%.*]] = add i32 [[TMP17]], 1 // CHECK23-NEXT: [[CMP9:%.*]] = icmp ult i32 [[TMP16]], [[ADD8]] // CHECK23-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK23-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK23-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK23-NEXT: [[ADD10:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK23-NEXT: store i32 [[ADD10]], i32* [[I6]], align 4 -// CHECK23-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[ADD10]], i32* [[I6]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !28 // CHECK23-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK23-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4 -// CHECK23-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK23-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !28 // CHECK23-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK23-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK23-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK23-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 -// CHECK23-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK23-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4, !llvm.access.group !28 // CHECK23-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK23-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK23-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK23-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4 +// CHECK23-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4, !llvm.access.group !28 // CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK23-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK23-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK23-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK23-NEXT: store i32 [[ADD18]], i32* [[ARRAYIDX]], align 4 +// CHECK23-NEXT: store i32 [[ADD18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK23-NEXT: [[ADD19:%.*]] = add i32 [[TMP24]], 1 -// CHECK23-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK23-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: @@ -24705,37 +24705,37 @@ int bar(int n){ // CHECK23: omp_if.then: // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 // CHECK23-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK23-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !31 +// CHECK23-NEXT: [[TMP13:%.*]] = load i32, i32* [[B_ADDR]], align 4, !llvm.access.group !31 // CHECK23-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP13]] to double // CHECK23-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK23-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: store double [[ADD5]], double* [[A]], align 4 +// CHECK23-NEXT: store double [[ADD5]], double* [[A]], align 4, !llvm.access.group !31 // CHECK23-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: [[TMP14:%.*]] = load double, double* [[A6]], align 4 +// CHECK23-NEXT: [[TMP14:%.*]] = load double, double* [[A6]], align 4, !llvm.access.group !31 // CHECK23-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK23-NEXT: store double [[INC]], double* [[A6]], align 4 +// CHECK23-NEXT: store double [[INC]], double* [[A6]], align 4, !llvm.access.group !31 // CHECK23-NEXT: [[CONV7:%.*]] = fptosi double [[INC]] to i16 // CHECK23-NEXT: [[TMP15:%.*]] = mul nsw i32 1, [[TMP2]] // CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP15]] // CHECK23-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK23-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2 +// CHECK23-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2, !llvm.access.group !31 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK23-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK23-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK23-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_IF_END:%.*]] // CHECK23: omp_if.else: @@ -24771,7 +24771,7 @@ int bar(int n){ // CHECK23-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK23-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK23-NEXT: store i32 [[ADD25]], i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK23: omp.inner.for.end26: // CHECK23-NEXT: br label [[OMP_IF_END]] // CHECK23: omp_if.end: @@ -24856,35 +24856,35 @@ int bar(int n){ // CHECK23-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 // CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !36 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !36 // CHECK23-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK23-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK23-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK23-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !36 +// CHECK23-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !36 // CHECK23-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK23-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK23-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK23-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK23-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !36 // CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !36 // CHECK23-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK23-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK23-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !36 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK23-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK23-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK23-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK23: omp.loop.exit: @@ -24961,23 +24961,23 @@ int bar(int n){ // CHECK24-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK24-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK24: omp.inner.for.cond: -// CHECK24-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK24-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK24-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK24-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK24-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK24: omp.inner.for.body: -// CHECK24-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK24-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK24-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK24-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK24-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK24-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 // CHECK24-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK24: omp.body.continue: // CHECK24-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK24: omp.inner.for.inc: -// CHECK24-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK24-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK24-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK24-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK24-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK24: omp.inner.for.end: // CHECK24-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK24: omp.loop.exit: @@ -25046,28 +25046,28 @@ int bar(int n){ // CHECK24-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK24-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK24: omp.inner.for.cond: -// CHECK24-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK24-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK24-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK24-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK24-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK24: omp.inner.for.body: -// CHECK24-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK24-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK24-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK24-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK24-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK24-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK24-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 +// CHECK24-NEXT: [[TMP8:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !19 // CHECK24-NEXT: [[CONV2:%.*]] = sext i16 [[TMP8]] to i32 // CHECK24-NEXT: [[ADD3:%.*]] = add nsw i32 [[CONV2]], 1 // CHECK24-NEXT: [[CONV4:%.*]] = trunc i32 [[ADD3]] to i16 -// CHECK24-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4 +// CHECK24-NEXT: store i16 [[CONV4]], i16* [[CONV]], align 4, !llvm.access.group !19 // CHECK24-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK24: omp.body.continue: // CHECK24-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK24: omp.inner.for.inc: -// CHECK24-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK24-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK24-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK24-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK24-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK24: omp.inner.for.end: // CHECK24-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK24: omp.loop.exit: @@ -25144,31 +25144,31 @@ int bar(int n){ // CHECK24-NEXT: store i32 [[TMP4]], i32* [[DOTOMP_IV]], align 4 // CHECK24-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK24: omp.inner.for.cond: -// CHECK24-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK24-NEXT: [[TMP5:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK24-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK24-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP5]], [[TMP6]] // CHECK24-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK24: omp.inner.for.body: -// CHECK24-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK24-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK24-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP7]], 1 // CHECK24-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK24-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK24-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK24-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK24-NEXT: [[TMP8:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !22 // CHECK24-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP8]], 1 -// CHECK24-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK24-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK24-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !22 +// CHECK24-NEXT: [[TMP9:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !22 // CHECK24-NEXT: [[CONV3:%.*]] = sext i16 [[TMP9]] to i32 // CHECK24-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK24-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK24-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK24-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !22 // CHECK24-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK24: omp.body.continue: // CHECK24-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK24: omp.inner.for.inc: -// CHECK24-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK24-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK24-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK24-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK24-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK24: omp.inner.for.end: // CHECK24-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK24: omp.loop.exit: @@ -25284,59 +25284,59 @@ int bar(int n){ // CHECK24-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK24-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK24: omp.inner.for.cond: -// CHECK24-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK24-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK24-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK24-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK24-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK24: omp.inner.for.body: -// CHECK24-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK24-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK24-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK24-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK24-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK24-NEXT: [[TMP16:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK24-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 +// CHECK24-NEXT: [[TMP16:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !25 // CHECK24-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK24-NEXT: store i32 [[ADD6]], i32* [[A_ADDR]], align 4 +// CHECK24-NEXT: store i32 [[ADD6]], i32* [[A_ADDR]], align 4, !llvm.access.group !25 // CHECK24-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x float], [10 x float]* [[TMP0]], i32 0, i32 2 -// CHECK24-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4 +// CHECK24-NEXT: [[TMP17:%.*]] = load float, float* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK24-NEXT: [[CONV:%.*]] = fpext float [[TMP17]] to double // CHECK24-NEXT: [[ADD7:%.*]] = fadd double [[CONV]], 1.000000e+00 // CHECK24-NEXT: [[CONV8:%.*]] = fptrunc double [[ADD7]] to float -// CHECK24-NEXT: store float [[CONV8]], float* [[ARRAYIDX]], align 4 +// CHECK24-NEXT: store float [[CONV8]], float* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK24-NEXT: [[ARRAYIDX9:%.*]] = getelementptr inbounds float, float* [[TMP2]], i32 3 -// CHECK24-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX9]], align 4 +// CHECK24-NEXT: [[TMP18:%.*]] = load float, float* [[ARRAYIDX9]], align 4, !llvm.access.group !25 // CHECK24-NEXT: [[CONV10:%.*]] = fpext float [[TMP18]] to double // CHECK24-NEXT: [[ADD11:%.*]] = fadd double [[CONV10]], 1.000000e+00 // CHECK24-NEXT: [[CONV12:%.*]] = fptrunc double [[ADD11]] to float -// CHECK24-NEXT: store float [[CONV12]], float* [[ARRAYIDX9]], align 4 +// CHECK24-NEXT: store float [[CONV12]], float* [[ARRAYIDX9]], align 4, !llvm.access.group !25 // CHECK24-NEXT: [[ARRAYIDX13:%.*]] = getelementptr inbounds [5 x [10 x double]], [5 x [10 x double]]* [[TMP3]], i32 0, i32 1 // CHECK24-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds [10 x double], [10 x double]* [[ARRAYIDX13]], i32 0, i32 2 -// CHECK24-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX14]], align 8 +// CHECK24-NEXT: [[TMP19:%.*]] = load double, double* [[ARRAYIDX14]], align 8, !llvm.access.group !25 // CHECK24-NEXT: [[ADD15:%.*]] = fadd double [[TMP19]], 1.000000e+00 -// CHECK24-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8 +// CHECK24-NEXT: store double [[ADD15]], double* [[ARRAYIDX14]], align 8, !llvm.access.group !25 // CHECK24-NEXT: [[TMP20:%.*]] = mul nsw i32 1, [[TMP5]] // CHECK24-NEXT: [[ARRAYIDX16:%.*]] = getelementptr inbounds double, double* [[TMP6]], i32 [[TMP20]] // CHECK24-NEXT: [[ARRAYIDX17:%.*]] = getelementptr inbounds double, double* [[ARRAYIDX16]], i32 3 -// CHECK24-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX17]], align 8 +// CHECK24-NEXT: [[TMP21:%.*]] = load double, double* [[ARRAYIDX17]], align 8, !llvm.access.group !25 // CHECK24-NEXT: [[ADD18:%.*]] = fadd double [[TMP21]], 1.000000e+00 -// CHECK24-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8 +// CHECK24-NEXT: store double [[ADD18]], double* [[ARRAYIDX17]], align 8, !llvm.access.group !25 // CHECK24-NEXT: [[X:%.*]] = getelementptr inbounds [[STRUCT_TT:%.*]], %struct.TT* [[TMP7]], i32 0, i32 0 -// CHECK24-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 4 +// CHECK24-NEXT: [[TMP22:%.*]] = load i64, i64* [[X]], align 4, !llvm.access.group !25 // CHECK24-NEXT: [[ADD19:%.*]] = add nsw i64 [[TMP22]], 1 -// CHECK24-NEXT: store i64 [[ADD19]], i64* [[X]], align 4 +// CHECK24-NEXT: store i64 [[ADD19]], i64* [[X]], align 4, !llvm.access.group !25 // CHECK24-NEXT: [[Y:%.*]] = getelementptr inbounds [[STRUCT_TT]], %struct.TT* [[TMP7]], i32 0, i32 1 -// CHECK24-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 4 +// CHECK24-NEXT: [[TMP23:%.*]] = load i8, i8* [[Y]], align 4, !llvm.access.group !25 // CHECK24-NEXT: [[CONV20:%.*]] = sext i8 [[TMP23]] to i32 // CHECK24-NEXT: [[ADD21:%.*]] = add nsw i32 [[CONV20]], 1 // CHECK24-NEXT: [[CONV22:%.*]] = trunc i32 [[ADD21]] to i8 -// CHECK24-NEXT: store i8 [[CONV22]], i8* [[Y]], align 4 +// CHECK24-NEXT: store i8 [[CONV22]], i8* [[Y]], align 4, !llvm.access.group !25 // CHECK24-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK24: omp.body.continue: // CHECK24-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK24: omp.inner.for.inc: -// CHECK24-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK24-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK24-NEXT: [[ADD23:%.*]] = add nsw i32 [[TMP24]], 1 -// CHECK24-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK24-NEXT: store i32 [[ADD23]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK24: omp.inner.for.end: // CHECK24-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK24: omp.loop.exit: @@ -25464,42 +25464,42 @@ int bar(int n){ // CHECK24-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK24-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK24: omp.inner.for.cond: -// CHECK24-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK24-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK24-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 // CHECK24-NEXT: [[ADD8:%.*]] = add i32 [[TMP17]], 1 // CHECK24-NEXT: [[CMP9:%.*]] = icmp ult i32 [[TMP16]], [[ADD8]] // CHECK24-NEXT: br i1 [[CMP9]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK24: omp.inner.for.body: -// CHECK24-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4 -// CHECK24-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK24-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_]], align 4, !llvm.access.group !28 +// CHECK24-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK24-NEXT: [[MUL:%.*]] = mul i32 [[TMP19]], 1 // CHECK24-NEXT: [[ADD10:%.*]] = add i32 [[TMP18]], [[MUL]] -// CHECK24-NEXT: store i32 [[ADD10]], i32* [[I6]], align 4 -// CHECK24-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK24-NEXT: store i32 [[ADD10]], i32* [[I6]], align 4, !llvm.access.group !28 +// CHECK24-NEXT: [[TMP20:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !28 // CHECK24-NEXT: [[ADD11:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK24-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4 -// CHECK24-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK24-NEXT: store i32 [[ADD11]], i32* [[A_ADDR]], align 4, !llvm.access.group !28 +// CHECK24-NEXT: [[TMP21:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !28 // CHECK24-NEXT: [[CONV12:%.*]] = sext i16 [[TMP21]] to i32 // CHECK24-NEXT: [[ADD13:%.*]] = add nsw i32 [[CONV12]], 1 // CHECK24-NEXT: [[CONV14:%.*]] = trunc i32 [[ADD13]] to i16 -// CHECK24-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4 -// CHECK24-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4 +// CHECK24-NEXT: store i16 [[CONV14]], i16* [[CONV]], align 4, !llvm.access.group !28 +// CHECK24-NEXT: [[TMP22:%.*]] = load i8, i8* [[CONV1]], align 4, !llvm.access.group !28 // CHECK24-NEXT: [[CONV15:%.*]] = sext i8 [[TMP22]] to i32 // CHECK24-NEXT: [[ADD16:%.*]] = add nsw i32 [[CONV15]], 1 // CHECK24-NEXT: [[CONV17:%.*]] = trunc i32 [[ADD16]] to i8 -// CHECK24-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4 +// CHECK24-NEXT: store i8 [[CONV17]], i8* [[CONV1]], align 4, !llvm.access.group !28 // CHECK24-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK24-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK24-NEXT: [[TMP23:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK24-NEXT: [[ADD18:%.*]] = add nsw i32 [[TMP23]], 1 -// CHECK24-NEXT: store i32 [[ADD18]], i32* [[ARRAYIDX]], align 4 +// CHECK24-NEXT: store i32 [[ADD18]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK24-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK24: omp.body.continue: // CHECK24-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK24: omp.inner.for.inc: -// CHECK24-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK24-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK24-NEXT: [[ADD19:%.*]] = add i32 [[TMP24]], 1 -// CHECK24-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK24-NEXT: store i32 [[ADD19]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK24: omp.inner.for.end: // CHECK24-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK24: omp.loop.exit: @@ -25619,37 +25619,37 @@ int bar(int n){ // CHECK24: omp_if.then: // CHECK24-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK24: omp.inner.for.cond: -// CHECK24-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK24-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK24-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 // CHECK24-NEXT: [[CMP3:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK24-NEXT: br i1 [[CMP3]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK24: omp.inner.for.body: -// CHECK24-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK24-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK24-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK24-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK24-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK24-NEXT: [[TMP13:%.*]] = load i32, i32* [[B_ADDR]], align 4 +// CHECK24-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !31 +// CHECK24-NEXT: [[TMP13:%.*]] = load i32, i32* [[B_ADDR]], align 4, !llvm.access.group !31 // CHECK24-NEXT: [[CONV4:%.*]] = sitofp i32 [[TMP13]] to double // CHECK24-NEXT: [[ADD5:%.*]] = fadd double [[CONV4]], 1.500000e+00 // CHECK24-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK24-NEXT: store double [[ADD5]], double* [[A]], align 4 +// CHECK24-NEXT: store double [[ADD5]], double* [[A]], align 4, !llvm.access.group !31 // CHECK24-NEXT: [[A6:%.*]] = getelementptr inbounds [[STRUCT_S1]], %struct.S1* [[TMP0]], i32 0, i32 0 -// CHECK24-NEXT: [[TMP14:%.*]] = load double, double* [[A6]], align 4 +// CHECK24-NEXT: [[TMP14:%.*]] = load double, double* [[A6]], align 4, !llvm.access.group !31 // CHECK24-NEXT: [[INC:%.*]] = fadd double [[TMP14]], 1.000000e+00 -// CHECK24-NEXT: store double [[INC]], double* [[A6]], align 4 +// CHECK24-NEXT: store double [[INC]], double* [[A6]], align 4, !llvm.access.group !31 // CHECK24-NEXT: [[CONV7:%.*]] = fptosi double [[INC]] to i16 // CHECK24-NEXT: [[TMP15:%.*]] = mul nsw i32 1, [[TMP2]] // CHECK24-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, i16* [[TMP3]], i32 [[TMP15]] // CHECK24-NEXT: [[ARRAYIDX8:%.*]] = getelementptr inbounds i16, i16* [[ARRAYIDX]], i32 1 -// CHECK24-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2 +// CHECK24-NEXT: store i16 [[CONV7]], i16* [[ARRAYIDX8]], align 2, !llvm.access.group !31 // CHECK24-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK24: omp.body.continue: // CHECK24-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK24: omp.inner.for.inc: -// CHECK24-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK24-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK24-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK24-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK24-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK24: omp.inner.for.end: // CHECK24-NEXT: br label [[OMP_IF_END:%.*]] // CHECK24: omp_if.else: @@ -25685,7 +25685,7 @@ int bar(int n){ // CHECK24-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK24-NEXT: [[ADD25:%.*]] = add nsw i32 [[TMP23]], 1 // CHECK24-NEXT: store i32 [[ADD25]], i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND10]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK24: omp.inner.for.end26: // CHECK24-NEXT: br label [[OMP_IF_END]] // CHECK24: omp_if.end: @@ -25770,35 +25770,35 @@ int bar(int n){ // CHECK24-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK24-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK24: omp.inner.for.cond: -// CHECK24-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK24-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK24-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 // CHECK24-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK24-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK24: omp.inner.for.body: -// CHECK24-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK24-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK24-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK24-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK24-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK24-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4 +// CHECK24-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !36 +// CHECK24-NEXT: [[TMP9:%.*]] = load i32, i32* [[A_ADDR]], align 4, !llvm.access.group !36 // CHECK24-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP9]], 1 -// CHECK24-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4 -// CHECK24-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4 +// CHECK24-NEXT: store i32 [[ADD2]], i32* [[A_ADDR]], align 4, !llvm.access.group !36 +// CHECK24-NEXT: [[TMP10:%.*]] = load i16, i16* [[CONV]], align 4, !llvm.access.group !36 // CHECK24-NEXT: [[CONV3:%.*]] = sext i16 [[TMP10]] to i32 // CHECK24-NEXT: [[ADD4:%.*]] = add nsw i32 [[CONV3]], 1 // CHECK24-NEXT: [[CONV5:%.*]] = trunc i32 [[ADD4]] to i16 -// CHECK24-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4 +// CHECK24-NEXT: store i16 [[CONV5]], i16* [[CONV]], align 4, !llvm.access.group !36 // CHECK24-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 2 -// CHECK24-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4 +// CHECK24-NEXT: [[TMP11:%.*]] = load i32, i32* [[ARRAYIDX]], align 4, !llvm.access.group !36 // CHECK24-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK24-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4 +// CHECK24-NEXT: store i32 [[ADD6]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !36 // CHECK24-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK24: omp.body.continue: // CHECK24-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK24: omp.inner.for.inc: -// CHECK24-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK24-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 // CHECK24-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK24-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK24-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK24: omp.inner.for.end: // CHECK24-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK24: omp.loop.exit: diff --git a/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp index 691ccb2703df2..c3c14b6d1e18c 100644 --- a/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/target_teams_distribute_simd_dist_schedule_codegen.cpp @@ -260,28 +260,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -345,28 +345,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -437,28 +437,28 @@ int main (int argc, char **argv) { // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -628,28 +628,28 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -713,28 +713,28 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -805,28 +805,28 @@ int main (int argc, char **argv) { // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -996,27 +996,27 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !7 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1080,27 +1080,27 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1171,27 +1171,27 @@ int main (int argc, char **argv) { // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -1361,27 +1361,27 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP9]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !7 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -1445,27 +1445,27 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP9]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -1536,27 +1536,27 @@ int main (int argc, char **argv) { // CHECK4: omp.dispatch.body: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -2404,27 +2404,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -2532,27 +2532,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -2679,27 +2679,27 @@ int main (int argc, char **argv) { // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK9-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK9-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !18 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK9-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: @@ -2857,27 +2857,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -2941,27 +2941,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3032,27 +3032,27 @@ int main (int argc, char **argv) { // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: @@ -3406,27 +3406,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !9 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK10-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -3534,27 +3534,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK10-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -3681,27 +3681,27 @@ int main (int argc, char **argv) { // CHECK10: omp.dispatch.body: // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK10-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !13 -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I5]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[I5]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: @@ -3859,27 +3859,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK10-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -3943,27 +3943,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -4034,27 +4034,27 @@ int main (int argc, char **argv) { // CHECK10: omp.dispatch.body: // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: @@ -4403,26 +4403,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP16]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -4527,26 +4527,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP16]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -4667,26 +4667,26 @@ int main (int argc, char **argv) { // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP19]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: @@ -4844,26 +4844,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -4927,26 +4927,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -5017,26 +5017,26 @@ int main (int argc, char **argv) { // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !28 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: @@ -5385,26 +5385,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !10 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP16]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -5509,26 +5509,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP12]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP16]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -5649,26 +5649,26 @@ int main (int argc, char **argv) { // CHECK12: omp.dispatch.body: // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK12-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !14 -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP1]], i32 [[TMP19]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK12: omp.dispatch.inc: @@ -5826,26 +5826,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -5909,26 +5909,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -5999,26 +5999,26 @@ int main (int argc, char **argv) { // CHECK12: omp.dispatch.body: // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !28 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK12: omp.dispatch.inc: diff --git a/clang/test/OpenMP/task_codegen.cpp b/clang/test/OpenMP/task_codegen.cpp index de2e1a9d6900f..26ab2add1967a 100644 --- a/clang/test/OpenMP/task_codegen.cpp +++ b/clang/test/OpenMP/task_codegen.cpp @@ -482,17 +482,17 @@ void xxxx() { // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: store i32 15, i32* @a, align 4 // CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* @a, align 4 // CHECK1-NEXT: [[CONV_I:%.*]] = trunc i32 [[TMP11]] to i8 @@ -528,17 +528,17 @@ void xxxx() { // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.1* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !21 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !21 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !21 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !21 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !21 -// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !22 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !22 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !22 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !22 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !22 +// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !22 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !22 // CHECK1-NEXT: store i32 15, i32* @a, align 4 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP10]], i32 0, i32 0 // CHECK1-NEXT: [[TMP12:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP11]], align 8 @@ -570,42 +570,42 @@ void xxxx() { // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.2* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.3* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META27:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !31 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !31 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !31 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !31 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !31 -// CHECK1-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !31 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META26:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META28:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !32 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !32 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !32 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !32 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !32 +// CHECK1-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !32 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !32 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !32 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK1-NEXT: switch i32 [[TMP12]], label [[DOTUNTIED_DONE__I:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTUNTIED_JMP__I:%.*]] // CHECK1-NEXT: i32 1, label [[DOTUNTIED_JMP_1_I:%.*]] // CHECK1-NEXT: ] // CHECK1: .untied.done..i: -// CHECK1-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !31 +// CHECK1-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !32 // CHECK1-NEXT: br label [[CLEANUP_I:%.*]] // CHECK1: .untied.jmp..i: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !31 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !32 // CHECK1-NEXT: store i32 1, i32* [[TMP13]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !31 -// CHECK1-NEXT: [[TMP15:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !31 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !32 +// CHECK1-NEXT: [[TMP15:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !32 // CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i8* [[TMP15]]) #[[ATTR4]] // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__3_EXIT:%.*]] // CHECK1: .untied.jmp.1.i: -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !31 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !32 // CHECK1-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR4]] // CHECK1-NEXT: store i32 1, i32* @a, align 4 // CHECK1-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR4]] -// CHECK1-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !31 +// CHECK1-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !32 // CHECK1-NEXT: br label [[CLEANUP_I]] // CHECK1: cleanup.i: -// CHECK1-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !31 +// CHECK1-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !32 // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__3_EXIT]] // CHECK1: .omp_outlined..3.exit: // CHECK1-NEXT: ret i32 0 @@ -633,39 +633,39 @@ void xxxx() { // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.4* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.5* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META32:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META35:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META37:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META39:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !41 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !41 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !41 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !41 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !41 -// CHECK1-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !41 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !41 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !41 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META36:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META40:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !42 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !42 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !42 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !42 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !42 +// CHECK1-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !42 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !42 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !42 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK1-NEXT: switch i32 [[TMP12]], label [[DOTUNTIED_DONE__I:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTUNTIED_JMP__I:%.*]] // CHECK1-NEXT: i32 1, label [[DOTUNTIED_JMP_1_I:%.*]] // CHECK1-NEXT: ] // CHECK1: .untied.done..i: -// CHECK1-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !41 +// CHECK1-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !42 // CHECK1-NEXT: br label [[CLEANUP_I:%.*]] // CHECK1: .untied.jmp..i: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !41 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !42 // CHECK1-NEXT: store i32 1, i32* [[TMP13]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !41 -// CHECK1-NEXT: [[TMP15:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !41 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !42 +// CHECK1-NEXT: [[TMP15:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !42 // CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i8* [[TMP15]]) #[[ATTR4]] // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__5_EXIT:%.*]] // CHECK1: .untied.jmp.1.i: // CHECK1-NEXT: store i32 1, i32* @a, align 4 -// CHECK1-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !41 +// CHECK1-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !42 // CHECK1-NEXT: br label [[CLEANUP_I]] // CHECK1: cleanup.i: -// CHECK1-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !41 +// CHECK1-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !42 // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__5_EXIT]] // CHECK1: .omp_outlined..5.exit: // CHECK1-NEXT: ret i32 0 @@ -693,39 +693,39 @@ void xxxx() { // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.6* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.7* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META42:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META45:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META47:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !51 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !51 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !51 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !51 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !51 -// CHECK1-NEXT: store %struct.anon.6* [[TMP8]], %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !51 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !51 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !51 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META46:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META48:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META50:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !52 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !52 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !52 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !52 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !52 +// CHECK1-NEXT: store %struct.anon.6* [[TMP8]], %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !52 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !52 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !52 // CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK1-NEXT: switch i32 [[TMP12]], label [[DOTUNTIED_DONE__I:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTUNTIED_JMP__I:%.*]] // CHECK1-NEXT: i32 1, label [[DOTUNTIED_JMP_1_I:%.*]] // CHECK1-NEXT: ] // CHECK1: .untied.done..i: -// CHECK1-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !51 +// CHECK1-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !52 // CHECK1-NEXT: br label [[CLEANUP_I:%.*]] // CHECK1: .untied.jmp..i: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !51 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !52 // CHECK1-NEXT: store i32 1, i32* [[TMP13]], align 4 -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !51 -// CHECK1-NEXT: [[TMP15:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !51 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !52 +// CHECK1-NEXT: [[TMP15:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !52 // CHECK1-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i8* [[TMP15]]) #[[ATTR4]] // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__7_EXIT:%.*]] // CHECK1: .untied.jmp.1.i: // CHECK1-NEXT: store i32 1, i32* @a, align 4 -// CHECK1-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !51 +// CHECK1-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !52 // CHECK1-NEXT: br label [[CLEANUP_I]] // CHECK1: cleanup.i: -// CHECK1-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !51 +// CHECK1-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !52 // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__7_EXIT]] // CHECK1: .omp_outlined..7.exit: // CHECK1-NEXT: ret i32 0 @@ -752,17 +752,17 @@ void xxxx() { // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.8* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.9* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META52:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META55:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META57:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META59:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !61 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !61 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !61 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !61 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !61 -// CHECK1-NEXT: store %struct.anon.8* [[TMP8]], %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !61 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !61 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META53:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META56:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META58:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META60:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !62 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !62 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !62 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !62 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !62 +// CHECK1-NEXT: store %struct.anon.8* [[TMP8]], %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 // CHECK1-NEXT: store i32 2, i32* @a, align 4 // CHECK1-NEXT: ret i32 0 // @@ -788,17 +788,17 @@ void xxxx() { // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.10* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.11* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META62:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META65:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META67:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META69:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !71 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !71 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !71 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !71 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !71 -// CHECK1-NEXT: store %struct.anon.10* [[TMP8]], %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !71 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !71 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META63:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META66:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META68:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META70:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !72 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !72 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !72 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !72 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !72 +// CHECK1-NEXT: store %struct.anon.10* [[TMP8]], %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !72 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !72 // CHECK1-NEXT: store i32 2, i32* @a, align 4 // CHECK1-NEXT: ret i32 0 // @@ -824,17 +824,17 @@ void xxxx() { // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.12* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.13* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META72:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META75:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META77:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META79:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !81 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !81 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !81 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !81 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !81 -// CHECK1-NEXT: store %struct.anon.12* [[TMP8]], %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !81 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !81 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META73:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META76:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META78:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META80:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !82 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !82 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !82 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !82 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !82 +// CHECK1-NEXT: store %struct.anon.12* [[TMP8]], %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !82 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !82 // CHECK1-NEXT: store i32 3, i32* @a, align 4 // CHECK1-NEXT: ret i32 0 // @@ -860,17 +860,17 @@ void xxxx() { // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.14* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.15* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META82:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META85:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META87:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META89:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !91 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !91 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !91 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !91 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !91 -// CHECK1-NEXT: store %struct.anon.14* [[TMP8]], %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !91 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !91 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META83:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META86:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META88:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META90:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !92 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !92 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !92 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !92 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !92 +// CHECK1-NEXT: store %struct.anon.14* [[TMP8]], %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !92 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !92 // CHECK1-NEXT: store i32 4, i32* @a, align 4 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP10]], i32 0, i32 0 // CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8 @@ -916,22 +916,22 @@ void xxxx() { // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_18]], %struct.kmp_task_t_with_privates.18* [[TMP3]], i32 0, i32 2 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.18* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META92:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META95:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META97:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META99:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !101 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !101 -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !101 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !101 -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !101 -// CHECK1-NEXT: store %struct.anon.17* [[TMP8]], %struct.anon.17** [[__CONTEXT_ADDR_I]], align 8, !noalias !101 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon.17*, %struct.anon.17** [[__CONTEXT_ADDR_I]], align 8, !noalias !101 -// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !101 -// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !101 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META93:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META96:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META98:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META100:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !102 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !102 +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !102 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !102 +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !102 +// CHECK1-NEXT: store %struct.anon.17* [[TMP8]], %struct.anon.17** [[__CONTEXT_ADDR_I]], align 8, !noalias !102 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon.17*, %struct.anon.17** [[__CONTEXT_ADDR_I]], align 8, !noalias !102 +// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !102 +// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !102 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**)* // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !101 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !102 // CHECK1-NEXT: store i32 4, i32* [[TMP16]], align 128 // CHECK1-NEXT: store i32 4, i32* @a, align 4 // CHECK1-NEXT: ret i32 0 @@ -999,26 +999,26 @@ void xxxx() { // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_19]], %struct.kmp_task_t_with_privates.19* [[TMP3]], i32 0, i32 2 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.20* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.19* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META102:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META105:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META107:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META109:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111 -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !111 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.20*, i32**, %struct.S**, %struct.S***)* @.omp_task_privates_map..20 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !111 -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111 -// CHECK1-NEXT: store %struct.anon.16* [[TMP8]], %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !111 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !111 -// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !111 -// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !111 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META103:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META106:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META108:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META110:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !112 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !112 +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !112 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.20*, i32**, %struct.S**, %struct.S***)* @.omp_task_privates_map..20 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !112 +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !112 +// CHECK1-NEXT: store %struct.anon.16* [[TMP8]], %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !112 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !112 +// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !112 +// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !112 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**, %struct.S**, %struct.S***)* // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], %struct.S** [[DOTLOCAL_PTR_ADDR_I]], %struct.S*** [[DOTLOCAL_PTR_ADDR1_I]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !111 -// CHECK1-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[DOTLOCAL_PTR_ADDR_I]], align 8, !noalias !111 -// CHECK1-NEXT: [[TMP18:%.*]] = load %struct.S**, %struct.S*** [[DOTLOCAL_PTR_ADDR1_I]], align 8, !noalias !111 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !112 +// CHECK1-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[DOTLOCAL_PTR_ADDR_I]], align 8, !noalias !112 +// CHECK1-NEXT: [[TMP18:%.*]] = load %struct.S**, %struct.S*** [[DOTLOCAL_PTR_ADDR1_I]], align 8, !noalias !112 // CHECK1-NEXT: [[TMP19:%.*]] = load %struct.S*, %struct.S** [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !112 // CHECK1-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 // CHECK1-NEXT: switch i32 [[TMP21]], label [[DOTUNTIED_DONE__I:%.*]] [ // CHECK1-NEXT: i32 0, label [[DOTUNTIED_JMP__I:%.*]] @@ -1029,32 +1029,32 @@ void xxxx() { // CHECK1-NEXT: i32 5, label [[DOTUNTIED_JMP_10_I:%.*]] // CHECK1-NEXT: ] // CHECK1: .untied.done..i: -// CHECK1-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !111 +// CHECK1-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !112 // CHECK1-NEXT: br label [[CLEANUP_I:%.*]] // CHECK1: .untied.jmp..i: -// CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111 +// CHECK1-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !112 // CHECK1-NEXT: store i32 1, i32* [[TMP22]], align 4 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111 -// CHECK1-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !112 +// CHECK1-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !112 // CHECK1-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], i8* [[TMP24]]) #[[ATTR4]] // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__17_EXIT:%.*]] // CHECK1: .untied.jmp.2.i: // CHECK1-NEXT: call void @_ZN1SC1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[TMP17]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111 +// CHECK1-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !112 // CHECK1-NEXT: [[DOTS2__VOID_ADDR_I:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP26]], i64 4, i8* inttoptr (i64 7 to i8*)) #[[ATTR4]] // CHECK1-NEXT: [[DOTS2__ADDR_I:%.*]] = bitcast i8* [[DOTS2__VOID_ADDR_I]] to %struct.S* // CHECK1-NEXT: store %struct.S* [[DOTS2__ADDR_I]], %struct.S** [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111 +// CHECK1-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !112 // CHECK1-NEXT: store i32 2, i32* [[TMP27]], align 4 -// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111 -// CHECK1-NEXT: [[TMP29:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111 +// CHECK1-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !112 +// CHECK1-NEXT: [[TMP29:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !112 // CHECK1-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]], i8* [[TMP29]]) #[[ATTR4]] // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__17_EXIT]] // CHECK1: .untied.jmp.3.i: // CHECK1-NEXT: call void @_ZN1SC1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[TMP19]]) #[[ATTR4]] // CHECK1-NEXT: [[A_I:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP19]], i32 0, i32 0 // CHECK1-NEXT: store i32 0, i32* [[A_I]], align 4 -// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111 +// CHECK1-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !112 // CHECK1-NEXT: [[TMP32:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP31]], i32 1, i64 256, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.18*)* @.omp_task_entry..19 to i32 (i32, i8*)*)) #[[ATTR4]] // CHECK1-NEXT: [[TMP33:%.*]] = bitcast i8* [[TMP32]] to %struct.kmp_task_t_with_privates.18* // CHECK1-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_18:%.*]], %struct.kmp_task_t_with_privates.18* [[TMP33]], i32 0, i32 0 @@ -1062,21 +1062,21 @@ void xxxx() { // CHECK1-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP35]], i32 0, i32 0 // CHECK1-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP16]], align 128 // CHECK1-NEXT: store i32 [[TMP37]], i32* [[TMP36]], align 128 -// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111 +// CHECK1-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !112 // CHECK1-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]], i8* [[TMP32]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111 +// CHECK1-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !112 // CHECK1-NEXT: store i32 3, i32* [[TMP40]], align 4 -// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111 -// CHECK1-NEXT: [[TMP42:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111 +// CHECK1-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !112 +// CHECK1-NEXT: [[TMP42:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !112 // CHECK1-NEXT: [[TMP43:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP41]], i8* [[TMP42]]) #[[ATTR4]] // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__17_EXIT]] // CHECK1: .untied.jmp.5.i: -// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111 +// CHECK1-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !112 // CHECK1-NEXT: [[TMP45:%.*]] = call i32 @__kmpc_omp_taskyield(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]], i32 0) #[[ATTR4]] -// CHECK1-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !112 // CHECK1-NEXT: store i32 4, i32* [[TMP46]], align 4 -// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111 -// CHECK1-NEXT: [[TMP48:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111 +// CHECK1-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !112 +// CHECK1-NEXT: [[TMP48:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !112 // CHECK1-NEXT: [[TMP49:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]], i8* [[TMP48]]) #[[ATTR4]] // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__17_EXIT]] // CHECK1: .untied.jmp.7.i: @@ -1087,24 +1087,24 @@ void xxxx() { // CHECK1-NEXT: call void @_ZN1SD1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[REF_TMP_I]]) #[[ATTR4]] // CHECK1-NEXT: [[A9_I:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP19]], i32 0, i32 0 // CHECK1-NEXT: store i32 10, i32* [[A9_I]], align 4 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !112 // CHECK1-NEXT: [[TMP53:%.*]] = call i32 @__kmpc_omp_taskwait(%struct.ident_t* @[[GLOB1]], i32 [[TMP52]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP54:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111 +// CHECK1-NEXT: [[TMP54:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !112 // CHECK1-NEXT: store i32 5, i32* [[TMP54]], align 4 -// CHECK1-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111 -// CHECK1-NEXT: [[TMP56:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111 +// CHECK1-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !112 +// CHECK1-NEXT: [[TMP56:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !112 // CHECK1-NEXT: [[TMP57:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP55]], i8* [[TMP56]]) #[[ATTR4]] // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__17_EXIT]] // CHECK1: .untied.jmp.10.i: // CHECK1-NEXT: call void @_ZN1SD1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[TMP19]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP58:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111 +// CHECK1-NEXT: [[TMP58:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !112 // CHECK1-NEXT: [[TMP59:%.*]] = bitcast %struct.S* [[TMP19]] to i8* // CHECK1-NEXT: call void @__kmpc_free(i32 [[TMP58]], i8* [[TMP59]], i8* inttoptr (i64 7 to i8*)) #[[ATTR4]] // CHECK1-NEXT: call void @_ZN1SD1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[TMP17]]) #[[ATTR4]] -// CHECK1-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !111 +// CHECK1-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !112 // CHECK1-NEXT: br label [[CLEANUP_I]] // CHECK1: cleanup.i: -// CHECK1-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !111 +// CHECK1-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !112 // CHECK1-NEXT: br label [[DOTOMP_OUTLINED__17_EXIT]] // CHECK1: .omp_outlined..17.exit: // CHECK1-NEXT: ret i32 0 @@ -1199,17 +1199,17 @@ void xxxx() { // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.21* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.22* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META112:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META115:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META117:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META119:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !121 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !121 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !121 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !121 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !121 -// CHECK1-NEXT: store %struct.anon.21* [[TMP8]], %struct.anon.21** [[__CONTEXT_ADDR_I]], align 8, !noalias !121 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.21*, %struct.anon.21** [[__CONTEXT_ADDR_I]], align 8, !noalias !121 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META113:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META116:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META118:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META120:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !122 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !122 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !122 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !122 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !122 +// CHECK1-NEXT: store %struct.anon.21* [[TMP8]], %struct.anon.21** [[__CONTEXT_ADDR_I]], align 8, !noalias !122 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.21*, %struct.anon.21** [[__CONTEXT_ADDR_I]], align 8, !noalias !122 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], %struct.anon.21* [[TMP10]], i32 0, i32 0 // CHECK1-NEXT: [[TMP12:%.*]] = load %struct.S1*, %struct.S1** [[TMP11]], align 8 // CHECK1-NEXT: [[A_I:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP12]], i32 0, i32 0 @@ -1309,22 +1309,22 @@ void xxxx() { // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_24]], %struct.kmp_task_t_with_privates.24* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.25* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.24* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META122:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META125:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META127:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META129:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !131 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !131 -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !131 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.25*, double**)* @.omp_task_privates_map..26 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !131 -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !131 -// CHECK1-NEXT: store %struct.anon.23* [[TMP8]], %struct.anon.23** [[__CONTEXT_ADDR_I]], align 8, !noalias !131 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon.23*, %struct.anon.23** [[__CONTEXT_ADDR_I]], align 8, !noalias !131 -// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !131 -// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !131 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META123:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META126:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META128:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META130:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !132 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !132 +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !132 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.25*, double**)* @.omp_task_privates_map..26 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !132 +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !132 +// CHECK1-NEXT: store %struct.anon.23* [[TMP8]], %struct.anon.23** [[__CONTEXT_ADDR_I]], align 8, !noalias !132 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon.23*, %struct.anon.23** [[__CONTEXT_ADDR_I]], align 8, !noalias !132 +// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !132 +// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !132 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, double**)* // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], double** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]] -// CHECK1-NEXT: [[TMP16:%.*]] = load double*, double** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !131 +// CHECK1-NEXT: [[TMP16:%.*]] = load double*, double** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !132 // CHECK1-NEXT: [[TMP17:%.*]] = load double, double* [[TMP16]], align 8 // CHECK1-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_23:%.*]], %struct.anon.23* [[TMP12]], i32 0, i32 0 // CHECK1-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP18]], align 8 @@ -1674,17 +1674,17 @@ void xxxx() { // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK2-NEXT: store i32 15, i32* @a, align 4 // CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* @a, align 4 // CHECK2-NEXT: [[CONV_I:%.*]] = trunc i32 [[TMP11]] to i8 @@ -1720,17 +1720,17 @@ void xxxx() { // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.1* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !21 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !21 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !21 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !21 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !21 -// CHECK2-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 -// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !22 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !22 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !22 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !22 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !22 +// CHECK2-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !22 +// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !22 // CHECK2-NEXT: store i32 15, i32* @a, align 4 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP10]], i32 0, i32 0 // CHECK2-NEXT: [[TMP12:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP11]], align 8 @@ -1762,42 +1762,42 @@ void xxxx() { // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.2* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.3* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META27:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !31 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !31 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !31 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !31 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !31 -// CHECK2-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 -// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !31 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META26:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META28:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !32 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !32 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !32 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !32 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !32 +// CHECK2-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !32 +// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !32 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !32 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK2-NEXT: switch i32 [[TMP12]], label [[DOTUNTIED_DONE__I:%.*]] [ // CHECK2-NEXT: i32 0, label [[DOTUNTIED_JMP__I:%.*]] // CHECK2-NEXT: i32 1, label [[DOTUNTIED_JMP_1_I:%.*]] // CHECK2-NEXT: ] // CHECK2: .untied.done..i: -// CHECK2-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !31 +// CHECK2-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !32 // CHECK2-NEXT: br label [[CLEANUP_I:%.*]] // CHECK2: .untied.jmp..i: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !31 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !32 // CHECK2-NEXT: store i32 1, i32* [[TMP13]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !31 -// CHECK2-NEXT: [[TMP15:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !31 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !32 +// CHECK2-NEXT: [[TMP15:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !32 // CHECK2-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i8* [[TMP15]]) #[[ATTR4]] // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__3_EXIT:%.*]] // CHECK2: .untied.jmp.1.i: -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !31 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !32 // CHECK2-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR4]] // CHECK2-NEXT: store i32 1, i32* @a, align 4 // CHECK2-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[TMP17]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR4]] -// CHECK2-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !31 +// CHECK2-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !32 // CHECK2-NEXT: br label [[CLEANUP_I]] // CHECK2: cleanup.i: -// CHECK2-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !31 +// CHECK2-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !32 // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__3_EXIT]] // CHECK2: .omp_outlined..3.exit: // CHECK2-NEXT: ret i32 0 @@ -1825,39 +1825,39 @@ void xxxx() { // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.4* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.5* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META32:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META35:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META37:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META39:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !41 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !41 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !41 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !41 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !41 -// CHECK2-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !41 -// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !41 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !41 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META36:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META40:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !42 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !42 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !42 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !42 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !42 +// CHECK2-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !42 +// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !42 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !42 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK2-NEXT: switch i32 [[TMP12]], label [[DOTUNTIED_DONE__I:%.*]] [ // CHECK2-NEXT: i32 0, label [[DOTUNTIED_JMP__I:%.*]] // CHECK2-NEXT: i32 1, label [[DOTUNTIED_JMP_1_I:%.*]] // CHECK2-NEXT: ] // CHECK2: .untied.done..i: -// CHECK2-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !41 +// CHECK2-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !42 // CHECK2-NEXT: br label [[CLEANUP_I:%.*]] // CHECK2: .untied.jmp..i: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !41 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !42 // CHECK2-NEXT: store i32 1, i32* [[TMP13]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !41 -// CHECK2-NEXT: [[TMP15:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !41 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !42 +// CHECK2-NEXT: [[TMP15:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !42 // CHECK2-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i8* [[TMP15]]) #[[ATTR4]] // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__5_EXIT:%.*]] // CHECK2: .untied.jmp.1.i: // CHECK2-NEXT: store i32 1, i32* @a, align 4 -// CHECK2-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !41 +// CHECK2-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !42 // CHECK2-NEXT: br label [[CLEANUP_I]] // CHECK2: cleanup.i: -// CHECK2-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !41 +// CHECK2-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !42 // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__5_EXIT]] // CHECK2: .omp_outlined..5.exit: // CHECK2-NEXT: ret i32 0 @@ -1885,39 +1885,39 @@ void xxxx() { // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.6* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.7* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META42:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META45:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META47:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !51 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !51 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !51 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !51 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !51 -// CHECK2-NEXT: store %struct.anon.6* [[TMP8]], %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !51 -// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !51 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !51 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META46:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META48:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META50:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !52 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !52 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !52 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !52 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !52 +// CHECK2-NEXT: store %struct.anon.6* [[TMP8]], %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !52 +// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !52 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !52 // CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK2-NEXT: switch i32 [[TMP12]], label [[DOTUNTIED_DONE__I:%.*]] [ // CHECK2-NEXT: i32 0, label [[DOTUNTIED_JMP__I:%.*]] // CHECK2-NEXT: i32 1, label [[DOTUNTIED_JMP_1_I:%.*]] // CHECK2-NEXT: ] // CHECK2: .untied.done..i: -// CHECK2-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !51 +// CHECK2-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !52 // CHECK2-NEXT: br label [[CLEANUP_I:%.*]] // CHECK2: .untied.jmp..i: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !51 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !52 // CHECK2-NEXT: store i32 1, i32* [[TMP13]], align 4 -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !51 -// CHECK2-NEXT: [[TMP15:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !51 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !52 +// CHECK2-NEXT: [[TMP15:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !52 // CHECK2-NEXT: [[TMP16:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP14]], i8* [[TMP15]]) #[[ATTR4]] // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__7_EXIT:%.*]] // CHECK2: .untied.jmp.1.i: // CHECK2-NEXT: store i32 1, i32* @a, align 4 -// CHECK2-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !51 +// CHECK2-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !52 // CHECK2-NEXT: br label [[CLEANUP_I]] // CHECK2: cleanup.i: -// CHECK2-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !51 +// CHECK2-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !52 // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__7_EXIT]] // CHECK2: .omp_outlined..7.exit: // CHECK2-NEXT: ret i32 0 @@ -1944,17 +1944,17 @@ void xxxx() { // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.8* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.9* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META52:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META55:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META57:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META59:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !61 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !61 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !61 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !61 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !61 -// CHECK2-NEXT: store %struct.anon.8* [[TMP8]], %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !61 -// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !61 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META53:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META56:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META58:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META60:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !62 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !62 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !62 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !62 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !62 +// CHECK2-NEXT: store %struct.anon.8* [[TMP8]], %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 +// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 // CHECK2-NEXT: store i32 2, i32* @a, align 4 // CHECK2-NEXT: ret i32 0 // @@ -1980,17 +1980,17 @@ void xxxx() { // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.10* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.11* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META62:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META65:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META67:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META69:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !71 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !71 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !71 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !71 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !71 -// CHECK2-NEXT: store %struct.anon.10* [[TMP8]], %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !71 -// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !71 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META63:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META66:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META68:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META70:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !72 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !72 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !72 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !72 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !72 +// CHECK2-NEXT: store %struct.anon.10* [[TMP8]], %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !72 +// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !72 // CHECK2-NEXT: store i32 2, i32* @a, align 4 // CHECK2-NEXT: ret i32 0 // @@ -2016,17 +2016,17 @@ void xxxx() { // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.12* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.13* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META72:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META75:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META77:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META79:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !81 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !81 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !81 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !81 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !81 -// CHECK2-NEXT: store %struct.anon.12* [[TMP8]], %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !81 -// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !81 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META73:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META76:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META78:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META80:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !82 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !82 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !82 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !82 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !82 +// CHECK2-NEXT: store %struct.anon.12* [[TMP8]], %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !82 +// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !82 // CHECK2-NEXT: store i32 3, i32* @a, align 4 // CHECK2-NEXT: ret i32 0 // @@ -2052,17 +2052,17 @@ void xxxx() { // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.14* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.15* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META82:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META85:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META87:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META89:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !91 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !91 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !91 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !91 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !91 -// CHECK2-NEXT: store %struct.anon.14* [[TMP8]], %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !91 -// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !91 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META83:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META86:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META88:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META90:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !92 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !92 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !92 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !92 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !92 +// CHECK2-NEXT: store %struct.anon.14* [[TMP8]], %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !92 +// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !92 // CHECK2-NEXT: store i32 4, i32* @a, align 4 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP10]], i32 0, i32 0 // CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8 @@ -2108,22 +2108,22 @@ void xxxx() { // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_18]], %struct.kmp_task_t_with_privates.18* [[TMP3]], i32 0, i32 2 // CHECK2-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK2-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.18* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META92:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META95:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META97:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META99:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !101 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !101 -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !101 -// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !101 -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !101 -// CHECK2-NEXT: store %struct.anon.17* [[TMP8]], %struct.anon.17** [[__CONTEXT_ADDR_I]], align 8, !noalias !101 -// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon.17*, %struct.anon.17** [[__CONTEXT_ADDR_I]], align 8, !noalias !101 -// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !101 -// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !101 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META93:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META96:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META98:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META100:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !102 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !102 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !102 +// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !102 +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !102 +// CHECK2-NEXT: store %struct.anon.17* [[TMP8]], %struct.anon.17** [[__CONTEXT_ADDR_I]], align 8, !noalias !102 +// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon.17*, %struct.anon.17** [[__CONTEXT_ADDR_I]], align 8, !noalias !102 +// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !102 +// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !102 // CHECK2-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**)* // CHECK2-NEXT: call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !101 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !102 // CHECK2-NEXT: store i32 4, i32* [[TMP16]], align 128 // CHECK2-NEXT: store i32 4, i32* @a, align 4 // CHECK2-NEXT: ret i32 0 @@ -2191,26 +2191,26 @@ void xxxx() { // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_19]], %struct.kmp_task_t_with_privates.19* [[TMP3]], i32 0, i32 2 // CHECK2-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.20* [[TMP9]] to i8* // CHECK2-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.19* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META102:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META105:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META107:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META109:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111 -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !111 -// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.20*, i32**, %struct.S**, %struct.S***)* @.omp_task_privates_map..20 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !111 -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111 -// CHECK2-NEXT: store %struct.anon.16* [[TMP8]], %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !111 -// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !111 -// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !111 -// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !111 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META103:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META106:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META108:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META110:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !112 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !112 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !112 +// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.20*, i32**, %struct.S**, %struct.S***)* @.omp_task_privates_map..20 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !112 +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !112 +// CHECK2-NEXT: store %struct.anon.16* [[TMP8]], %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !112 +// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !112 +// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !112 +// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !112 // CHECK2-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**, %struct.S**, %struct.S***)* // CHECK2-NEXT: call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], %struct.S** [[DOTLOCAL_PTR_ADDR_I]], %struct.S*** [[DOTLOCAL_PTR_ADDR1_I]]) #[[ATTR4]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !111 -// CHECK2-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[DOTLOCAL_PTR_ADDR_I]], align 8, !noalias !111 -// CHECK2-NEXT: [[TMP18:%.*]] = load %struct.S**, %struct.S*** [[DOTLOCAL_PTR_ADDR1_I]], align 8, !noalias !111 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !112 +// CHECK2-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[DOTLOCAL_PTR_ADDR_I]], align 8, !noalias !112 +// CHECK2-NEXT: [[TMP18:%.*]] = load %struct.S**, %struct.S*** [[DOTLOCAL_PTR_ADDR1_I]], align 8, !noalias !112 // CHECK2-NEXT: [[TMP19:%.*]] = load %struct.S*, %struct.S** [[TMP18]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !112 // CHECK2-NEXT: [[TMP21:%.*]] = load i32, i32* [[TMP20]], align 4 // CHECK2-NEXT: switch i32 [[TMP21]], label [[DOTUNTIED_DONE__I:%.*]] [ // CHECK2-NEXT: i32 0, label [[DOTUNTIED_JMP__I:%.*]] @@ -2221,32 +2221,32 @@ void xxxx() { // CHECK2-NEXT: i32 5, label [[DOTUNTIED_JMP_10_I:%.*]] // CHECK2-NEXT: ] // CHECK2: .untied.done..i: -// CHECK2-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !111 +// CHECK2-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !112 // CHECK2-NEXT: br label [[CLEANUP_I:%.*]] // CHECK2: .untied.jmp..i: -// CHECK2-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111 +// CHECK2-NEXT: [[TMP22:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !112 // CHECK2-NEXT: store i32 1, i32* [[TMP22]], align 4 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111 -// CHECK2-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !112 +// CHECK2-NEXT: [[TMP24:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !112 // CHECK2-NEXT: [[TMP25:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP23]], i8* [[TMP24]]) #[[ATTR4]] // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__17_EXIT:%.*]] // CHECK2: .untied.jmp.2.i: // CHECK2-NEXT: call void @_ZN1SC1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[TMP17]]) #[[ATTR4]] -// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111 +// CHECK2-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !112 // CHECK2-NEXT: [[DOTS2__VOID_ADDR_I:%.*]] = call i8* @__kmpc_alloc(i32 [[TMP26]], i64 4, i8* inttoptr (i64 7 to i8*)) #[[ATTR4]] // CHECK2-NEXT: [[DOTS2__ADDR_I:%.*]] = bitcast i8* [[DOTS2__VOID_ADDR_I]] to %struct.S* // CHECK2-NEXT: store %struct.S* [[DOTS2__ADDR_I]], %struct.S** [[TMP18]], align 8 -// CHECK2-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111 +// CHECK2-NEXT: [[TMP27:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !112 // CHECK2-NEXT: store i32 2, i32* [[TMP27]], align 4 -// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111 -// CHECK2-NEXT: [[TMP29:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111 +// CHECK2-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !112 +// CHECK2-NEXT: [[TMP29:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !112 // CHECK2-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP28]], i8* [[TMP29]]) #[[ATTR4]] // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__17_EXIT]] // CHECK2: .untied.jmp.3.i: // CHECK2-NEXT: call void @_ZN1SC1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[TMP19]]) #[[ATTR4]] // CHECK2-NEXT: [[A_I:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP19]], i32 0, i32 0 // CHECK2-NEXT: store i32 0, i32* [[A_I]], align 4 -// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111 +// CHECK2-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !112 // CHECK2-NEXT: [[TMP32:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[TMP31]], i32 1, i64 256, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.18*)* @.omp_task_entry..19 to i32 (i32, i8*)*)) #[[ATTR4]] // CHECK2-NEXT: [[TMP33:%.*]] = bitcast i8* [[TMP32]] to %struct.kmp_task_t_with_privates.18* // CHECK2-NEXT: [[TMP34:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_18:%.*]], %struct.kmp_task_t_with_privates.18* [[TMP33]], i32 0, i32 0 @@ -2254,21 +2254,21 @@ void xxxx() { // CHECK2-NEXT: [[TMP36:%.*]] = getelementptr inbounds [[STRUCT__KMP_PRIVATES_T:%.*]], %struct..kmp_privates.t* [[TMP35]], i32 0, i32 0 // CHECK2-NEXT: [[TMP37:%.*]] = load i32, i32* [[TMP16]], align 128 // CHECK2-NEXT: store i32 [[TMP37]], i32* [[TMP36]], align 128 -// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111 +// CHECK2-NEXT: [[TMP38:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !112 // CHECK2-NEXT: [[TMP39:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP38]], i8* [[TMP32]]) #[[ATTR4]] -// CHECK2-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111 +// CHECK2-NEXT: [[TMP40:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !112 // CHECK2-NEXT: store i32 3, i32* [[TMP40]], align 4 -// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111 -// CHECK2-NEXT: [[TMP42:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111 +// CHECK2-NEXT: [[TMP41:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !112 +// CHECK2-NEXT: [[TMP42:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !112 // CHECK2-NEXT: [[TMP43:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP41]], i8* [[TMP42]]) #[[ATTR4]] // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__17_EXIT]] // CHECK2: .untied.jmp.5.i: -// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111 +// CHECK2-NEXT: [[TMP44:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !112 // CHECK2-NEXT: [[TMP45:%.*]] = call i32 @__kmpc_omp_taskyield(%struct.ident_t* @[[GLOB1]], i32 [[TMP44]], i32 0) #[[ATTR4]] -// CHECK2-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !112 // CHECK2-NEXT: store i32 4, i32* [[TMP46]], align 4 -// CHECK2-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111 -// CHECK2-NEXT: [[TMP48:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111 +// CHECK2-NEXT: [[TMP47:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !112 +// CHECK2-NEXT: [[TMP48:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !112 // CHECK2-NEXT: [[TMP49:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP47]], i8* [[TMP48]]) #[[ATTR4]] // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__17_EXIT]] // CHECK2: .untied.jmp.7.i: @@ -2279,24 +2279,24 @@ void xxxx() { // CHECK2-NEXT: call void @_ZN1SD1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[REF_TMP_I]]) #[[ATTR4]] // CHECK2-NEXT: [[A9_I:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[TMP19]], i32 0, i32 0 // CHECK2-NEXT: store i32 10, i32* [[A9_I]], align 4 -// CHECK2-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111 +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !112 // CHECK2-NEXT: [[TMP53:%.*]] = call i32 @__kmpc_omp_taskwait(%struct.ident_t* @[[GLOB1]], i32 [[TMP52]]) #[[ATTR4]] -// CHECK2-NEXT: [[TMP54:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111 +// CHECK2-NEXT: [[TMP54:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !112 // CHECK2-NEXT: store i32 5, i32* [[TMP54]], align 4 -// CHECK2-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111 -// CHECK2-NEXT: [[TMP56:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111 +// CHECK2-NEXT: [[TMP55:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !112 +// CHECK2-NEXT: [[TMP56:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !112 // CHECK2-NEXT: [[TMP57:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[TMP55]], i8* [[TMP56]]) #[[ATTR4]] // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__17_EXIT]] // CHECK2: .untied.jmp.10.i: // CHECK2-NEXT: call void @_ZN1SD1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[TMP19]]) #[[ATTR4]] -// CHECK2-NEXT: [[TMP58:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111 +// CHECK2-NEXT: [[TMP58:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !112 // CHECK2-NEXT: [[TMP59:%.*]] = bitcast %struct.S* [[TMP19]] to i8* // CHECK2-NEXT: call void @__kmpc_free(i32 [[TMP58]], i8* [[TMP59]], i8* inttoptr (i64 7 to i8*)) #[[ATTR4]] // CHECK2-NEXT: call void @_ZN1SD1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[TMP17]]) #[[ATTR4]] -// CHECK2-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !111 +// CHECK2-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !112 // CHECK2-NEXT: br label [[CLEANUP_I]] // CHECK2: cleanup.i: -// CHECK2-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !111 +// CHECK2-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !112 // CHECK2-NEXT: br label [[DOTOMP_OUTLINED__17_EXIT]] // CHECK2: .omp_outlined..17.exit: // CHECK2-NEXT: ret i32 0 @@ -2391,17 +2391,17 @@ void xxxx() { // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.21* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.22* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META112:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META115:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META117:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META119:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !121 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !121 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !121 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !121 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !121 -// CHECK2-NEXT: store %struct.anon.21* [[TMP8]], %struct.anon.21** [[__CONTEXT_ADDR_I]], align 8, !noalias !121 -// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.21*, %struct.anon.21** [[__CONTEXT_ADDR_I]], align 8, !noalias !121 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META113:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META116:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META118:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META120:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !122 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !122 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !122 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !122 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !122 +// CHECK2-NEXT: store %struct.anon.21* [[TMP8]], %struct.anon.21** [[__CONTEXT_ADDR_I]], align 8, !noalias !122 +// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.21*, %struct.anon.21** [[__CONTEXT_ADDR_I]], align 8, !noalias !122 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], %struct.anon.21* [[TMP10]], i32 0, i32 0 // CHECK2-NEXT: [[TMP12:%.*]] = load %struct.S1*, %struct.S1** [[TMP11]], align 8 // CHECK2-NEXT: [[A_I:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP12]], i32 0, i32 0 @@ -2501,22 +2501,22 @@ void xxxx() { // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_24]], %struct.kmp_task_t_with_privates.24* [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.25* [[TMP9]] to i8* // CHECK2-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.24* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META122:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META125:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META127:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META129:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !131 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !131 -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !131 -// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.25*, double**)* @.omp_task_privates_map..26 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !131 -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !131 -// CHECK2-NEXT: store %struct.anon.23* [[TMP8]], %struct.anon.23** [[__CONTEXT_ADDR_I]], align 8, !noalias !131 -// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon.23*, %struct.anon.23** [[__CONTEXT_ADDR_I]], align 8, !noalias !131 -// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !131 -// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !131 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META123:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META126:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META128:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META130:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !132 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !132 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !132 +// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.25*, double**)* @.omp_task_privates_map..26 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !132 +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !132 +// CHECK2-NEXT: store %struct.anon.23* [[TMP8]], %struct.anon.23** [[__CONTEXT_ADDR_I]], align 8, !noalias !132 +// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon.23*, %struct.anon.23** [[__CONTEXT_ADDR_I]], align 8, !noalias !132 +// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !132 +// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !132 // CHECK2-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, double**)* // CHECK2-NEXT: call void [[TMP15]](i8* [[TMP14]], double** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]] -// CHECK2-NEXT: [[TMP16:%.*]] = load double*, double** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !131 +// CHECK2-NEXT: [[TMP16:%.*]] = load double*, double** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !132 // CHECK2-NEXT: [[TMP17:%.*]] = load double, double* [[TMP16]], align 8 // CHECK2-NEXT: [[TMP18:%.*]] = getelementptr inbounds [[STRUCT_ANON_23:%.*]], %struct.anon.23* [[TMP12]], i32 0, i32 0 // CHECK2-NEXT: [[TMP19:%.*]] = load float*, float** [[TMP18]], align 8 @@ -2885,17 +2885,17 @@ void xxxx() { // CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK3-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK3-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK3-NEXT: store i32 15, i32* @a, align 4 // CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* @a, align 4 // CHECK3-NEXT: [[CONV_I:%.*]] = trunc i32 [[TMP11]] to i8 @@ -2931,17 +2931,17 @@ void xxxx() { // CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK3-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.1* [[TMP3]] to i8* -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !21 -// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !21 -// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !21 -// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !21 -// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !21 -// CHECK3-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 -// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !22 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !22 +// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !22 +// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !22 +// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !22 +// CHECK3-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !22 +// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !22 // CHECK3-NEXT: store i32 15, i32* @a, align 4 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP10]], i32 0, i32 0 // CHECK3-NEXT: [[TMP12:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP11]], align 8 @@ -2973,31 +2973,31 @@ void xxxx() { // CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.2* // CHECK3-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.3* [[TMP3]] to i8* -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META27:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]]) -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !31 -// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !31 -// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !31 -// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !31 -// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !31 -// CHECK3-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 -// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !31 +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META26:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META28:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]]) +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !32 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !32 +// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !32 +// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !32 +// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !32 +// CHECK3-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !32 +// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !32 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !32 // CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK3-NEXT: switch i32 [[TMP12]], label [[DOTUNTIED_DONE__I:%.*]] [ // CHECK3-NEXT: i32 0, label [[DOTUNTIED_JMP__I:%.*]] // CHECK3-NEXT: i32 1, label [[DOTUNTIED_JMP_1_I:%.*]] // CHECK3-NEXT: ] // CHECK3: .untied.done..i: -// CHECK3-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !31 +// CHECK3-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !32 // CHECK3-NEXT: br label [[CLEANUP_I:%.*]] // CHECK3: .untied.jmp..i: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !31 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !32 // CHECK3-NEXT: store i32 1, i32* [[TMP13]], align 4 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB7]]) #[[ATTR4]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !31 +// CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !32 // CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP14]]) #[[ATTR4]] // CHECK3-NEXT: br label [[DOTOMP_OUTLINED__3_EXIT:%.*]] // CHECK3: .untied.jmp.1.i: @@ -3005,10 +3005,10 @@ void xxxx() { // CHECK3-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2_I]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR4]] // CHECK3-NEXT: store i32 1, i32* @a, align 4 // CHECK3-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2_I]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR4]] -// CHECK3-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !31 +// CHECK3-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !32 // CHECK3-NEXT: br label [[CLEANUP_I]] // CHECK3: cleanup.i: -// CHECK3-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !31 +// CHECK3-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !32 // CHECK3-NEXT: br label [[DOTOMP_OUTLINED__3_EXIT]] // CHECK3: .omp_outlined..3.exit: // CHECK3-NEXT: ret i32 0 @@ -3036,39 +3036,39 @@ void xxxx() { // CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.4* // CHECK3-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.5* [[TMP3]] to i8* -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META32:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META35:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META37:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META39:![0-9]+]]) -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !41 -// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !41 -// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !41 -// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !41 -// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !41 -// CHECK3-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !41 -// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !41 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !41 +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META36:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META40:![0-9]+]]) +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !42 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !42 +// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !42 +// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !42 +// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !42 +// CHECK3-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !42 +// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !42 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !42 // CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK3-NEXT: switch i32 [[TMP12]], label [[DOTUNTIED_DONE__I:%.*]] [ // CHECK3-NEXT: i32 0, label [[DOTUNTIED_JMP__I:%.*]] // CHECK3-NEXT: i32 1, label [[DOTUNTIED_JMP_1_I:%.*]] // CHECK3-NEXT: ] // CHECK3: .untied.done..i: -// CHECK3-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !41 +// CHECK3-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !42 // CHECK3-NEXT: br label [[CLEANUP_I:%.*]] // CHECK3: .untied.jmp..i: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !41 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !42 // CHECK3-NEXT: store i32 1, i32* [[TMP13]], align 4 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB9]]) #[[ATTR4]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !41 +// CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !42 // CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP14]]) #[[ATTR4]] // CHECK3-NEXT: br label [[DOTOMP_OUTLINED__5_EXIT:%.*]] // CHECK3: .untied.jmp.1.i: // CHECK3-NEXT: store i32 1, i32* @a, align 4 -// CHECK3-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !41 +// CHECK3-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !42 // CHECK3-NEXT: br label [[CLEANUP_I]] // CHECK3: cleanup.i: -// CHECK3-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !41 +// CHECK3-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !42 // CHECK3-NEXT: br label [[DOTOMP_OUTLINED__5_EXIT]] // CHECK3: .omp_outlined..5.exit: // CHECK3-NEXT: ret i32 0 @@ -3096,39 +3096,39 @@ void xxxx() { // CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.6* // CHECK3-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.7* [[TMP3]] to i8* -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META42:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META45:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META47:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !51 -// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !51 -// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !51 -// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !51 -// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !51 -// CHECK3-NEXT: store %struct.anon.6* [[TMP8]], %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !51 -// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !51 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !51 +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META46:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META48:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META50:![0-9]+]]) +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !52 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !52 +// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !52 +// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !52 +// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !52 +// CHECK3-NEXT: store %struct.anon.6* [[TMP8]], %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !52 +// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !52 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !52 // CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK3-NEXT: switch i32 [[TMP12]], label [[DOTUNTIED_DONE__I:%.*]] [ // CHECK3-NEXT: i32 0, label [[DOTUNTIED_JMP__I:%.*]] // CHECK3-NEXT: i32 1, label [[DOTUNTIED_JMP_1_I:%.*]] // CHECK3-NEXT: ] // CHECK3: .untied.done..i: -// CHECK3-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !51 +// CHECK3-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !52 // CHECK3-NEXT: br label [[CLEANUP_I:%.*]] // CHECK3: .untied.jmp..i: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !51 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !52 // CHECK3-NEXT: store i32 1, i32* [[TMP13]], align 4 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB11]]) #[[ATTR4]] -// CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !51 +// CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !52 // CHECK3-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP14]]) #[[ATTR4]] // CHECK3-NEXT: br label [[DOTOMP_OUTLINED__7_EXIT:%.*]] // CHECK3: .untied.jmp.1.i: // CHECK3-NEXT: store i32 1, i32* @a, align 4 -// CHECK3-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !51 +// CHECK3-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !52 // CHECK3-NEXT: br label [[CLEANUP_I]] // CHECK3: cleanup.i: -// CHECK3-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !51 +// CHECK3-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !52 // CHECK3-NEXT: br label [[DOTOMP_OUTLINED__7_EXIT]] // CHECK3: .omp_outlined..7.exit: // CHECK3-NEXT: ret i32 0 @@ -3155,17 +3155,17 @@ void xxxx() { // CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.8* // CHECK3-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.9* [[TMP3]] to i8* -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META52:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META55:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META57:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META59:![0-9]+]]) -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !61 -// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !61 -// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !61 -// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !61 -// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !61 -// CHECK3-NEXT: store %struct.anon.8* [[TMP8]], %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !61 -// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !61 +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META53:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META56:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META58:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META60:![0-9]+]]) +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !62 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !62 +// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !62 +// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !62 +// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !62 +// CHECK3-NEXT: store %struct.anon.8* [[TMP8]], %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 +// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 // CHECK3-NEXT: store i32 2, i32* @a, align 4 // CHECK3-NEXT: ret i32 0 // @@ -3191,17 +3191,17 @@ void xxxx() { // CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.10* // CHECK3-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.11* [[TMP3]] to i8* -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META62:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META65:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META67:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META69:![0-9]+]]) -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !71 -// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !71 -// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !71 -// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !71 -// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !71 -// CHECK3-NEXT: store %struct.anon.10* [[TMP8]], %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !71 -// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !71 +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META63:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META66:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META68:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META70:![0-9]+]]) +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !72 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !72 +// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !72 +// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !72 +// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !72 +// CHECK3-NEXT: store %struct.anon.10* [[TMP8]], %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !72 +// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !72 // CHECK3-NEXT: store i32 2, i32* @a, align 4 // CHECK3-NEXT: ret i32 0 // @@ -3227,17 +3227,17 @@ void xxxx() { // CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.12* // CHECK3-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.13* [[TMP3]] to i8* -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META72:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META75:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META77:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META79:![0-9]+]]) -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !81 -// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !81 -// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !81 -// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !81 -// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !81 -// CHECK3-NEXT: store %struct.anon.12* [[TMP8]], %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !81 -// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !81 +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META73:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META76:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META78:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META80:![0-9]+]]) +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !82 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !82 +// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !82 +// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !82 +// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !82 +// CHECK3-NEXT: store %struct.anon.12* [[TMP8]], %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !82 +// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !82 // CHECK3-NEXT: store i32 3, i32* @a, align 4 // CHECK3-NEXT: ret i32 0 // @@ -3263,17 +3263,17 @@ void xxxx() { // CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.14* // CHECK3-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.15* [[TMP3]] to i8* -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META82:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META85:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META87:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META89:![0-9]+]]) -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !91 -// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !91 -// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !91 -// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !91 -// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !91 -// CHECK3-NEXT: store %struct.anon.14* [[TMP8]], %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !91 -// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !91 +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META83:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META86:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META88:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META90:![0-9]+]]) +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !92 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !92 +// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !92 +// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !92 +// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !92 +// CHECK3-NEXT: store %struct.anon.14* [[TMP8]], %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !92 +// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !92 // CHECK3-NEXT: store i32 4, i32* @a, align 4 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP10]], i32 0, i32 0 // CHECK3-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8 @@ -3319,22 +3319,22 @@ void xxxx() { // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_18]], %struct.kmp_task_t_with_privates.18* [[TMP3]], i32 0, i32 2 // CHECK3-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK3-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.18* [[TMP3]] to i8* -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META92:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META95:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META97:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META99:![0-9]+]]) -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !101 -// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !101 -// CHECK3-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !101 -// CHECK3-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !101 -// CHECK3-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !101 -// CHECK3-NEXT: store %struct.anon.17* [[TMP8]], %struct.anon.17** [[__CONTEXT_ADDR_I]], align 8, !noalias !101 -// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.anon.17*, %struct.anon.17** [[__CONTEXT_ADDR_I]], align 8, !noalias !101 -// CHECK3-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !101 -// CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !101 +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META93:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META96:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META98:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META100:![0-9]+]]) +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !102 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !102 +// CHECK3-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !102 +// CHECK3-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !102 +// CHECK3-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !102 +// CHECK3-NEXT: store %struct.anon.17* [[TMP8]], %struct.anon.17** [[__CONTEXT_ADDR_I]], align 8, !noalias !102 +// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.anon.17*, %struct.anon.17** [[__CONTEXT_ADDR_I]], align 8, !noalias !102 +// CHECK3-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !102 +// CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !102 // CHECK3-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**)* // CHECK3-NEXT: call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !101 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !102 // CHECK3-NEXT: store i32 4, i32* [[TMP16]], align 128 // CHECK3-NEXT: store i32 4, i32* @a, align 4 // CHECK3-NEXT: ret i32 0 @@ -3404,25 +3404,25 @@ void xxxx() { // CHECK3-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_19]], %struct.kmp_task_t_with_privates.19* [[TMP3]], i32 0, i32 2 // CHECK3-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.20* [[TMP9]] to i8* // CHECK3-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.19* [[TMP3]] to i8* -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META102:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META105:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META107:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META109:![0-9]+]]) -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111 -// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111 -// CHECK3-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !111 -// CHECK3-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.20*, i32**, %struct.S**, %struct.S**)* @.omp_task_privates_map..20 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !111 -// CHECK3-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111 -// CHECK3-NEXT: store %struct.anon.16* [[TMP8]], %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !111 -// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !111 -// CHECK3-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !111 -// CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !111 +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META103:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META106:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META108:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META110:![0-9]+]]) +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !112 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !112 +// CHECK3-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !112 +// CHECK3-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.20*, i32**, %struct.S**, %struct.S**)* @.omp_task_privates_map..20 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !112 +// CHECK3-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !112 +// CHECK3-NEXT: store %struct.anon.16* [[TMP8]], %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !112 +// CHECK3-NEXT: [[TMP12:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !112 +// CHECK3-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !112 +// CHECK3-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !112 // CHECK3-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**, %struct.S**, %struct.S**)* // CHECK3-NEXT: call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], %struct.S** [[DOTLOCAL_PTR_ADDR_I]], %struct.S** [[DOTLOCAL_PTR_ADDR1_I]]) #[[ATTR4]] -// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !111 -// CHECK3-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[DOTLOCAL_PTR_ADDR_I]], align 8, !noalias !111 -// CHECK3-NEXT: [[TMP18:%.*]] = load %struct.S*, %struct.S** [[DOTLOCAL_PTR_ADDR1_I]], align 8, !noalias !111 -// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !112 +// CHECK3-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[DOTLOCAL_PTR_ADDR_I]], align 8, !noalias !112 +// CHECK3-NEXT: [[TMP18:%.*]] = load %struct.S*, %struct.S** [[DOTLOCAL_PTR_ADDR1_I]], align 8, !noalias !112 +// CHECK3-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !112 // CHECK3-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 // CHECK3-NEXT: switch i32 [[TMP20]], label [[DOTUNTIED_DONE__I:%.*]] [ // CHECK3-NEXT: i32 0, label [[DOTUNTIED_JMP__I:%.*]] @@ -3432,20 +3432,20 @@ void xxxx() { // CHECK3-NEXT: i32 4, label [[DOTUNTIED_JMP_15_I:%.*]] // CHECK3-NEXT: ] // CHECK3: .untied.done..i: -// CHECK3-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !111 +// CHECK3-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !112 // CHECK3-NEXT: br label [[CLEANUP_I:%.*]] // CHECK3: .untied.jmp..i: -// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111 +// CHECK3-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !112 // CHECK3-NEXT: store i32 1, i32* [[TMP21]], align 4 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) #[[ATTR4]] -// CHECK3-NEXT: [[TMP22:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111 +// CHECK3-NEXT: [[TMP22:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !112 // CHECK3-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP22]]) #[[ATTR4]] // CHECK3-NEXT: br label [[DOTOMP_OUTLINED__17_EXIT:%.*]] // CHECK3: .untied.jmp.2.i: // CHECK3-NEXT: call void @_ZN1SC1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[S1_I]]) #[[ATTR4]] // CHECK3-NEXT: call void @_ZN1SC1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[S2_I]]) #[[ATTR4]] // CHECK3-NEXT: [[A_I:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S2_I]], i32 0, i32 0 -// CHECK3-NEXT: store i32 0, i32* [[A_I]], align 4, !noalias !111 +// CHECK3-NEXT: store i32 0, i32* [[A_I]], align 4, !noalias !112 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM3_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) #[[ATTR4]] // CHECK3-NEXT: [[TMP24:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3_I]], i32 1, i64 256, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.18*)* @.omp_task_entry..19 to i32 (i32, i8*)*)) #[[ATTR4]] // CHECK3-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to %struct.kmp_task_t_with_privates.18* @@ -3456,44 +3456,44 @@ void xxxx() { // CHECK3-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 128 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM4_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) #[[ATTR4]] // CHECK3-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM4_I]], i8* [[TMP24]]) #[[ATTR4]] -// CHECK3-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111 +// CHECK3-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !112 // CHECK3-NEXT: store i32 2, i32* [[TMP31]], align 4 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM5_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) #[[ATTR4]] -// CHECK3-NEXT: [[TMP32:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111 +// CHECK3-NEXT: [[TMP32:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !112 // CHECK3-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM5_I]], i8* [[TMP32]]) #[[ATTR4]] // CHECK3-NEXT: br label [[DOTOMP_OUTLINED__17_EXIT]] // CHECK3: .untied.jmp.6.i: // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM8_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] // CHECK3-NEXT: [[TMP34:%.*]] = call i32 @__kmpc_omp_taskyield(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM8_I]], i32 0) #[[ATTR4]] -// CHECK3-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111 +// CHECK3-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !112 // CHECK3-NEXT: store i32 3, i32* [[TMP35]], align 4 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM9_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) #[[ATTR4]] -// CHECK3-NEXT: [[TMP36:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111 +// CHECK3-NEXT: [[TMP36:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !112 // CHECK3-NEXT: [[TMP37:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM9_I]], i8* [[TMP36]]) #[[ATTR4]] // CHECK3-NEXT: br label [[DOTOMP_OUTLINED__17_EXIT]] // CHECK3: .untied.jmp.10.i: // CHECK3-NEXT: call void @_ZN1SC1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[REF_TMP_I]]) #[[ATTR4]] // CHECK3-NEXT: [[TMP38:%.*]] = bitcast %struct.S* [[S1_I]] to i8* // CHECK3-NEXT: [[TMP39:%.*]] = bitcast %struct.S* [[REF_TMP_I]] to i8* -// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i64 4, i1 false) #[[ATTR4]], !noalias !111 +// CHECK3-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i64 4, i1 false) #[[ATTR4]], !noalias !112 // CHECK3-NEXT: call void @_ZN1SD1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[REF_TMP_I]]) #[[ATTR4]] // CHECK3-NEXT: [[A12_I:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S2_I]], i32 0, i32 0 -// CHECK3-NEXT: store i32 10, i32* [[A12_I]], align 4, !noalias !111 +// CHECK3-NEXT: store i32 10, i32* [[A12_I]], align 4, !noalias !112 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM13_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] // CHECK3-NEXT: [[TMP40:%.*]] = call i32 @__kmpc_omp_taskwait(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13_I]]) #[[ATTR4]] -// CHECK3-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111 +// CHECK3-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !112 // CHECK3-NEXT: store i32 4, i32* [[TMP41]], align 4 // CHECK3-NEXT: [[OMP_GLOBAL_THREAD_NUM14_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) #[[ATTR4]] -// CHECK3-NEXT: [[TMP42:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111 +// CHECK3-NEXT: [[TMP42:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !112 // CHECK3-NEXT: [[TMP43:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM14_I]], i8* [[TMP42]]) #[[ATTR4]] // CHECK3-NEXT: br label [[DOTOMP_OUTLINED__17_EXIT]] // CHECK3: .untied.jmp.15.i: // CHECK3-NEXT: call void @_ZN1SD1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[S2_I]]) #[[ATTR4]] // CHECK3-NEXT: call void @_ZN1SD1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[S1_I]]) #[[ATTR4]] -// CHECK3-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !111 +// CHECK3-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !112 // CHECK3-NEXT: br label [[CLEANUP_I]] // CHECK3: cleanup.i: -// CHECK3-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !111 +// CHECK3-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !112 // CHECK3-NEXT: br label [[DOTOMP_OUTLINED__17_EXIT]] // CHECK3: .omp_outlined..17.exit: // CHECK3-NEXT: ret i32 0 @@ -3589,17 +3589,17 @@ void xxxx() { // CHECK3-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK3-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.21* // CHECK3-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.22* [[TMP3]] to i8* -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META112:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META115:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META117:![0-9]+]]) -// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META119:![0-9]+]]) -// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !121 -// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !121 -// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !121 -// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !121 -// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !121 -// CHECK3-NEXT: store %struct.anon.21* [[TMP8]], %struct.anon.21** [[__CONTEXT_ADDR_I]], align 8, !noalias !121 -// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon.21*, %struct.anon.21** [[__CONTEXT_ADDR_I]], align 8, !noalias !121 +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META113:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META116:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META118:![0-9]+]]) +// CHECK3-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META120:![0-9]+]]) +// CHECK3-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !122 +// CHECK3-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !122 +// CHECK3-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !122 +// CHECK3-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !122 +// CHECK3-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !122 +// CHECK3-NEXT: store %struct.anon.21* [[TMP8]], %struct.anon.21** [[__CONTEXT_ADDR_I]], align 8, !noalias !122 +// CHECK3-NEXT: [[TMP10:%.*]] = load %struct.anon.21*, %struct.anon.21** [[__CONTEXT_ADDR_I]], align 8, !noalias !122 // CHECK3-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], %struct.anon.21* [[TMP10]], i32 0, i32 0 // CHECK3-NEXT: [[TMP12:%.*]] = load %struct.S1*, %struct.S1** [[TMP11]], align 8 // CHECK3-NEXT: [[A_I:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP12]], i32 0, i32 0 @@ -3964,17 +3964,17 @@ void xxxx() { // CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK4-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK4-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK4-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK4-NEXT: store i32 15, i32* @a, align 4 // CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* @a, align 4 // CHECK4-NEXT: [[CONV_I:%.*]] = trunc i32 [[TMP11]] to i8 @@ -4010,17 +4010,17 @@ void xxxx() { // CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK4-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.1* [[TMP3]] to i8* -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !21 -// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !21 -// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !21 -// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !21 -// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !21 -// CHECK4-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 -// CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !21 +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META13:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META16:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !22 +// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !22 +// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !22 +// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !22 +// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !22 +// CHECK4-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !22 +// CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !22 // CHECK4-NEXT: store i32 15, i32* @a, align 4 // CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_0:%.*]], %struct.anon.0* [[TMP10]], i32 0, i32 0 // CHECK4-NEXT: [[TMP12:%.*]] = load [2 x %struct.S]*, [2 x %struct.S]** [[TMP11]], align 8 @@ -4052,31 +4052,31 @@ void xxxx() { // CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.2* // CHECK4-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.3* [[TMP3]] to i8* -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META27:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]]) -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !31 -// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !31 -// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !31 -// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !31 -// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !31 -// CHECK4-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 -// CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !31 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !31 +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META23:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META26:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META28:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]]) +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !32 +// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !32 +// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !32 +// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !32 +// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !32 +// CHECK4-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !32 +// CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !32 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !32 // CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK4-NEXT: switch i32 [[TMP12]], label [[DOTUNTIED_DONE__I:%.*]] [ // CHECK4-NEXT: i32 0, label [[DOTUNTIED_JMP__I:%.*]] // CHECK4-NEXT: i32 1, label [[DOTUNTIED_JMP_1_I:%.*]] // CHECK4-NEXT: ] // CHECK4: .untied.done..i: -// CHECK4-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !31 +// CHECK4-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !32 // CHECK4-NEXT: br label [[CLEANUP_I:%.*]] // CHECK4: .untied.jmp..i: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !31 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !32 // CHECK4-NEXT: store i32 1, i32* [[TMP13]], align 4 // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB7]]) #[[ATTR4]] -// CHECK4-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !31 +// CHECK4-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !32 // CHECK4-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP14]]) #[[ATTR4]] // CHECK4-NEXT: br label [[DOTOMP_OUTLINED__3_EXIT:%.*]] // CHECK4: .untied.jmp.1.i: @@ -4084,10 +4084,10 @@ void xxxx() { // CHECK4-NEXT: call void @__kmpc_critical(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2_I]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR4]] // CHECK4-NEXT: store i32 1, i32* @a, align 4 // CHECK4-NEXT: call void @__kmpc_end_critical(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM2_I]], [8 x i32]* @.gomp_critical_user_.var) #[[ATTR4]] -// CHECK4-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !31 +// CHECK4-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !32 // CHECK4-NEXT: br label [[CLEANUP_I]] // CHECK4: cleanup.i: -// CHECK4-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !31 +// CHECK4-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !32 // CHECK4-NEXT: br label [[DOTOMP_OUTLINED__3_EXIT]] // CHECK4: .omp_outlined..3.exit: // CHECK4-NEXT: ret i32 0 @@ -4115,39 +4115,39 @@ void xxxx() { // CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.4* // CHECK4-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.5* [[TMP3]] to i8* -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META32:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META35:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META37:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META39:![0-9]+]]) -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !41 -// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !41 -// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !41 -// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !41 -// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !41 -// CHECK4-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !41 -// CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !41 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !41 +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META33:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META36:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META40:![0-9]+]]) +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !42 +// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !42 +// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !42 +// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !42 +// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !42 +// CHECK4-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !42 +// CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !42 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !42 // CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK4-NEXT: switch i32 [[TMP12]], label [[DOTUNTIED_DONE__I:%.*]] [ // CHECK4-NEXT: i32 0, label [[DOTUNTIED_JMP__I:%.*]] // CHECK4-NEXT: i32 1, label [[DOTUNTIED_JMP_1_I:%.*]] // CHECK4-NEXT: ] // CHECK4: .untied.done..i: -// CHECK4-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !41 +// CHECK4-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !42 // CHECK4-NEXT: br label [[CLEANUP_I:%.*]] // CHECK4: .untied.jmp..i: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !41 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !42 // CHECK4-NEXT: store i32 1, i32* [[TMP13]], align 4 // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB9]]) #[[ATTR4]] -// CHECK4-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !41 +// CHECK4-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !42 // CHECK4-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP14]]) #[[ATTR4]] // CHECK4-NEXT: br label [[DOTOMP_OUTLINED__5_EXIT:%.*]] // CHECK4: .untied.jmp.1.i: // CHECK4-NEXT: store i32 1, i32* @a, align 4 -// CHECK4-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !41 +// CHECK4-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !42 // CHECK4-NEXT: br label [[CLEANUP_I]] // CHECK4: cleanup.i: -// CHECK4-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !41 +// CHECK4-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !42 // CHECK4-NEXT: br label [[DOTOMP_OUTLINED__5_EXIT]] // CHECK4: .omp_outlined..5.exit: // CHECK4-NEXT: ret i32 0 @@ -4175,39 +4175,39 @@ void xxxx() { // CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.6* // CHECK4-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.7* [[TMP3]] to i8* -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META42:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META45:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META47:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !51 -// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !51 -// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !51 -// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !51 -// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !51 -// CHECK4-NEXT: store %struct.anon.6* [[TMP8]], %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !51 -// CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !51 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !51 +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META43:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META46:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META48:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META50:![0-9]+]]) +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !52 +// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !52 +// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !52 +// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !52 +// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !52 +// CHECK4-NEXT: store %struct.anon.6* [[TMP8]], %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !52 +// CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !52 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !52 // CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[TMP11]], align 4 // CHECK4-NEXT: switch i32 [[TMP12]], label [[DOTUNTIED_DONE__I:%.*]] [ // CHECK4-NEXT: i32 0, label [[DOTUNTIED_JMP__I:%.*]] // CHECK4-NEXT: i32 1, label [[DOTUNTIED_JMP_1_I:%.*]] // CHECK4-NEXT: ] // CHECK4: .untied.done..i: -// CHECK4-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !51 +// CHECK4-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !52 // CHECK4-NEXT: br label [[CLEANUP_I:%.*]] // CHECK4: .untied.jmp..i: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !51 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !52 // CHECK4-NEXT: store i32 1, i32* [[TMP13]], align 4 // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB11]]) #[[ATTR4]] -// CHECK4-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !51 +// CHECK4-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !52 // CHECK4-NEXT: [[TMP15:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP14]]) #[[ATTR4]] // CHECK4-NEXT: br label [[DOTOMP_OUTLINED__7_EXIT:%.*]] // CHECK4: .untied.jmp.1.i: // CHECK4-NEXT: store i32 1, i32* @a, align 4 -// CHECK4-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !51 +// CHECK4-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !52 // CHECK4-NEXT: br label [[CLEANUP_I]] // CHECK4: cleanup.i: -// CHECK4-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !51 +// CHECK4-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !52 // CHECK4-NEXT: br label [[DOTOMP_OUTLINED__7_EXIT]] // CHECK4: .omp_outlined..7.exit: // CHECK4-NEXT: ret i32 0 @@ -4234,17 +4234,17 @@ void xxxx() { // CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.8* // CHECK4-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.9* [[TMP3]] to i8* -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META52:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META55:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META57:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META59:![0-9]+]]) -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !61 -// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !61 -// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !61 -// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !61 -// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !61 -// CHECK4-NEXT: store %struct.anon.8* [[TMP8]], %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !61 -// CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !61 +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META53:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META56:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META58:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META60:![0-9]+]]) +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !62 +// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !62 +// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !62 +// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !62 +// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !62 +// CHECK4-NEXT: store %struct.anon.8* [[TMP8]], %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 +// CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !62 // CHECK4-NEXT: store i32 2, i32* @a, align 4 // CHECK4-NEXT: ret i32 0 // @@ -4270,17 +4270,17 @@ void xxxx() { // CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.10* // CHECK4-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.11* [[TMP3]] to i8* -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META62:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META65:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META67:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META69:![0-9]+]]) -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !71 -// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !71 -// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !71 -// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !71 -// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !71 -// CHECK4-NEXT: store %struct.anon.10* [[TMP8]], %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !71 -// CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !71 +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META63:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META66:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META68:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META70:![0-9]+]]) +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !72 +// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !72 +// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !72 +// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !72 +// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !72 +// CHECK4-NEXT: store %struct.anon.10* [[TMP8]], %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !72 +// CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !72 // CHECK4-NEXT: store i32 2, i32* @a, align 4 // CHECK4-NEXT: ret i32 0 // @@ -4306,17 +4306,17 @@ void xxxx() { // CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.12* // CHECK4-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.13* [[TMP3]] to i8* -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META72:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META75:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META77:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META79:![0-9]+]]) -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !81 -// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !81 -// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !81 -// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !81 -// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !81 -// CHECK4-NEXT: store %struct.anon.12* [[TMP8]], %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !81 -// CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !81 +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META73:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META76:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META78:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META80:![0-9]+]]) +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !82 +// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !82 +// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !82 +// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !82 +// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !82 +// CHECK4-NEXT: store %struct.anon.12* [[TMP8]], %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !82 +// CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !82 // CHECK4-NEXT: store i32 3, i32* @a, align 4 // CHECK4-NEXT: ret i32 0 // @@ -4342,17 +4342,17 @@ void xxxx() { // CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.14* // CHECK4-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.15* [[TMP3]] to i8* -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META82:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META85:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META87:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META89:![0-9]+]]) -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !91 -// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !91 -// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !91 -// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !91 -// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !91 -// CHECK4-NEXT: store %struct.anon.14* [[TMP8]], %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !91 -// CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !91 +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META83:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META86:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META88:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META90:![0-9]+]]) +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !92 +// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !92 +// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !92 +// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !92 +// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !92 +// CHECK4-NEXT: store %struct.anon.14* [[TMP8]], %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !92 +// CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !92 // CHECK4-NEXT: store i32 4, i32* @a, align 4 // CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_14:%.*]], %struct.anon.14* [[TMP10]], i32 0, i32 0 // CHECK4-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8 @@ -4398,22 +4398,22 @@ void xxxx() { // CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_18]], %struct.kmp_task_t_with_privates.18* [[TMP3]], i32 0, i32 2 // CHECK4-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK4-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.18* [[TMP3]] to i8* -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META92:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META95:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META97:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META99:![0-9]+]]) -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !101 -// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !101 -// CHECK4-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !101 -// CHECK4-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !101 -// CHECK4-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !101 -// CHECK4-NEXT: store %struct.anon.17* [[TMP8]], %struct.anon.17** [[__CONTEXT_ADDR_I]], align 8, !noalias !101 -// CHECK4-NEXT: [[TMP12:%.*]] = load %struct.anon.17*, %struct.anon.17** [[__CONTEXT_ADDR_I]], align 8, !noalias !101 -// CHECK4-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !101 -// CHECK4-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !101 +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META93:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META96:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META98:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META100:![0-9]+]]) +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !102 +// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !102 +// CHECK4-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !102 +// CHECK4-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i32**)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !102 +// CHECK4-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !102 +// CHECK4-NEXT: store %struct.anon.17* [[TMP8]], %struct.anon.17** [[__CONTEXT_ADDR_I]], align 8, !noalias !102 +// CHECK4-NEXT: [[TMP12:%.*]] = load %struct.anon.17*, %struct.anon.17** [[__CONTEXT_ADDR_I]], align 8, !noalias !102 +// CHECK4-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !102 +// CHECK4-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !102 // CHECK4-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**)* // CHECK4-NEXT: call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR4]] -// CHECK4-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !101 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !102 // CHECK4-NEXT: store i32 4, i32* [[TMP16]], align 128 // CHECK4-NEXT: store i32 4, i32* @a, align 4 // CHECK4-NEXT: ret i32 0 @@ -4483,25 +4483,25 @@ void xxxx() { // CHECK4-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES_19]], %struct.kmp_task_t_with_privates.19* [[TMP3]], i32 0, i32 2 // CHECK4-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t.20* [[TMP9]] to i8* // CHECK4-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates.19* [[TMP3]] to i8* -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META102:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META105:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META107:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META109:![0-9]+]]) -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !111 -// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111 -// CHECK4-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !111 -// CHECK4-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.20*, i32**, %struct.S**, %struct.S**)* @.omp_task_privates_map..20 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !111 -// CHECK4-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111 -// CHECK4-NEXT: store %struct.anon.16* [[TMP8]], %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !111 -// CHECK4-NEXT: [[TMP12:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !111 -// CHECK4-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !111 -// CHECK4-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !111 +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META103:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META106:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META108:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META110:![0-9]+]]) +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !112 +// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !112 +// CHECK4-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !112 +// CHECK4-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t.20*, i32**, %struct.S**, %struct.S**)* @.omp_task_privates_map..20 to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !112 +// CHECK4-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !112 +// CHECK4-NEXT: store %struct.anon.16* [[TMP8]], %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !112 +// CHECK4-NEXT: [[TMP12:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !112 +// CHECK4-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !112 +// CHECK4-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !112 // CHECK4-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i32**, %struct.S**, %struct.S**)* // CHECK4-NEXT: call void [[TMP15]](i8* [[TMP14]], i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], %struct.S** [[DOTLOCAL_PTR_ADDR_I]], %struct.S** [[DOTLOCAL_PTR_ADDR1_I]]) #[[ATTR4]] -// CHECK4-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !111 -// CHECK4-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[DOTLOCAL_PTR_ADDR_I]], align 8, !noalias !111 -// CHECK4-NEXT: [[TMP18:%.*]] = load %struct.S*, %struct.S** [[DOTLOCAL_PTR_ADDR1_I]], align 8, !noalias !111 -// CHECK4-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32*, i32** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !112 +// CHECK4-NEXT: [[TMP17:%.*]] = load %struct.S*, %struct.S** [[DOTLOCAL_PTR_ADDR_I]], align 8, !noalias !112 +// CHECK4-NEXT: [[TMP18:%.*]] = load %struct.S*, %struct.S** [[DOTLOCAL_PTR_ADDR1_I]], align 8, !noalias !112 +// CHECK4-NEXT: [[TMP19:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !112 // CHECK4-NEXT: [[TMP20:%.*]] = load i32, i32* [[TMP19]], align 4 // CHECK4-NEXT: switch i32 [[TMP20]], label [[DOTUNTIED_DONE__I:%.*]] [ // CHECK4-NEXT: i32 0, label [[DOTUNTIED_JMP__I:%.*]] @@ -4511,20 +4511,20 @@ void xxxx() { // CHECK4-NEXT: i32 4, label [[DOTUNTIED_JMP_15_I:%.*]] // CHECK4-NEXT: ] // CHECK4: .untied.done..i: -// CHECK4-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !111 +// CHECK4-NEXT: store i32 1, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !112 // CHECK4-NEXT: br label [[CLEANUP_I:%.*]] // CHECK4: .untied.jmp..i: -// CHECK4-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111 +// CHECK4-NEXT: [[TMP21:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !112 // CHECK4-NEXT: store i32 1, i32* [[TMP21]], align 4 // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) #[[ATTR4]] -// CHECK4-NEXT: [[TMP22:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111 +// CHECK4-NEXT: [[TMP22:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !112 // CHECK4-NEXT: [[TMP23:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM_I]], i8* [[TMP22]]) #[[ATTR4]] // CHECK4-NEXT: br label [[DOTOMP_OUTLINED__17_EXIT:%.*]] // CHECK4: .untied.jmp.2.i: // CHECK4-NEXT: call void @_ZN1SC1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[S1_I]]) #[[ATTR4]] // CHECK4-NEXT: call void @_ZN1SC1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[S2_I]]) #[[ATTR4]] // CHECK4-NEXT: [[A_I:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S2_I]], i32 0, i32 0 -// CHECK4-NEXT: store i32 0, i32* [[A_I]], align 4, !noalias !111 +// CHECK4-NEXT: store i32 0, i32* [[A_I]], align 4, !noalias !112 // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM3_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23:[0-9]+]]) #[[ATTR4]] // CHECK4-NEXT: [[TMP24:%.*]] = call i8* @__kmpc_omp_task_alloc(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3_I]], i32 1, i64 256, i64 1, i32 (i32, i8*)* bitcast (i32 (i32, %struct.kmp_task_t_with_privates.18*)* @.omp_task_entry..19 to i32 (i32, i8*)*)) #[[ATTR4]] // CHECK4-NEXT: [[TMP25:%.*]] = bitcast i8* [[TMP24]] to %struct.kmp_task_t_with_privates.18* @@ -4535,44 +4535,44 @@ void xxxx() { // CHECK4-NEXT: store i32 [[TMP29]], i32* [[TMP28]], align 128 // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM4_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB23]]) #[[ATTR4]] // CHECK4-NEXT: [[TMP30:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM4_I]], i8* [[TMP24]]) #[[ATTR4]] -// CHECK4-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111 +// CHECK4-NEXT: [[TMP31:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !112 // CHECK4-NEXT: store i32 2, i32* [[TMP31]], align 4 // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM5_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) #[[ATTR4]] -// CHECK4-NEXT: [[TMP32:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111 +// CHECK4-NEXT: [[TMP32:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !112 // CHECK4-NEXT: [[TMP33:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM5_I]], i8* [[TMP32]]) #[[ATTR4]] // CHECK4-NEXT: br label [[DOTOMP_OUTLINED__17_EXIT]] // CHECK4: .untied.jmp.6.i: // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM8_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] // CHECK4-NEXT: [[TMP34:%.*]] = call i32 @__kmpc_omp_taskyield(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM8_I]], i32 0) #[[ATTR4]] -// CHECK4-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111 +// CHECK4-NEXT: [[TMP35:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !112 // CHECK4-NEXT: store i32 3, i32* [[TMP35]], align 4 // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM9_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) #[[ATTR4]] -// CHECK4-NEXT: [[TMP36:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111 +// CHECK4-NEXT: [[TMP36:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !112 // CHECK4-NEXT: [[TMP37:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM9_I]], i8* [[TMP36]]) #[[ATTR4]] // CHECK4-NEXT: br label [[DOTOMP_OUTLINED__17_EXIT]] // CHECK4: .untied.jmp.10.i: // CHECK4-NEXT: call void @_ZN1SC1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[REF_TMP_I]]) #[[ATTR4]] // CHECK4-NEXT: [[TMP38:%.*]] = bitcast %struct.S* [[S1_I]] to i8* // CHECK4-NEXT: [[TMP39:%.*]] = bitcast %struct.S* [[REF_TMP_I]] to i8* -// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i64 4, i1 false) #[[ATTR4]], !noalias !111 +// CHECK4-NEXT: call void @llvm.memcpy.p0i8.p0i8.i64(i8* align 4 [[TMP38]], i8* align 4 [[TMP39]], i64 4, i1 false) #[[ATTR4]], !noalias !112 // CHECK4-NEXT: call void @_ZN1SD1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[REF_TMP_I]]) #[[ATTR4]] // CHECK4-NEXT: [[A12_I:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S2_I]], i32 0, i32 0 -// CHECK4-NEXT: store i32 10, i32* [[A12_I]], align 4, !noalias !111 +// CHECK4-NEXT: store i32 10, i32* [[A12_I]], align 4, !noalias !112 // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM13_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]]) #[[ATTR4]] // CHECK4-NEXT: [[TMP40:%.*]] = call i32 @__kmpc_omp_taskwait(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM13_I]]) #[[ATTR4]] -// CHECK4-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !111 +// CHECK4-NEXT: [[TMP41:%.*]] = load i32*, i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !112 // CHECK4-NEXT: store i32 4, i32* [[TMP41]], align 4 // CHECK4-NEXT: [[OMP_GLOBAL_THREAD_NUM14_I:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB21]]) #[[ATTR4]] -// CHECK4-NEXT: [[TMP42:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !111 +// CHECK4-NEXT: [[TMP42:%.*]] = load i8*, i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !112 // CHECK4-NEXT: [[TMP43:%.*]] = call i32 @__kmpc_omp_task(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM14_I]], i8* [[TMP42]]) #[[ATTR4]] // CHECK4-NEXT: br label [[DOTOMP_OUTLINED__17_EXIT]] // CHECK4: .untied.jmp.15.i: // CHECK4-NEXT: call void @_ZN1SD1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[S2_I]]) #[[ATTR4]] // CHECK4-NEXT: call void @_ZN1SD1Ev(%struct.S* nonnull align 4 dereferenceable(4) [[S1_I]]) #[[ATTR4]] -// CHECK4-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !111 +// CHECK4-NEXT: store i32 0, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !112 // CHECK4-NEXT: br label [[CLEANUP_I]] // CHECK4: cleanup.i: -// CHECK4-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !111 +// CHECK4-NEXT: [[CLEANUP_DEST_I:%.*]] = load i32, i32* [[CLEANUP_DEST_SLOT_I]], align 4, !noalias !112 // CHECK4-NEXT: br label [[DOTOMP_OUTLINED__17_EXIT]] // CHECK4: .omp_outlined..17.exit: // CHECK4-NEXT: ret i32 0 @@ -4668,17 +4668,17 @@ void xxxx() { // CHECK4-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK4-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.21* // CHECK4-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.22* [[TMP3]] to i8* -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META112:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META115:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META117:![0-9]+]]) -// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META119:![0-9]+]]) -// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !121 -// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !121 -// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !121 -// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !121 -// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !121 -// CHECK4-NEXT: store %struct.anon.21* [[TMP8]], %struct.anon.21** [[__CONTEXT_ADDR_I]], align 8, !noalias !121 -// CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon.21*, %struct.anon.21** [[__CONTEXT_ADDR_I]], align 8, !noalias !121 +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META113:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META116:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META118:![0-9]+]]) +// CHECK4-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META120:![0-9]+]]) +// CHECK4-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !122 +// CHECK4-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !122 +// CHECK4-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !122 +// CHECK4-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !122 +// CHECK4-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !122 +// CHECK4-NEXT: store %struct.anon.21* [[TMP8]], %struct.anon.21** [[__CONTEXT_ADDR_I]], align 8, !noalias !122 +// CHECK4-NEXT: [[TMP10:%.*]] = load %struct.anon.21*, %struct.anon.21** [[__CONTEXT_ADDR_I]], align 8, !noalias !122 // CHECK4-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_21:%.*]], %struct.anon.21* [[TMP10]], i32 0, i32 0 // CHECK4-NEXT: [[TMP12:%.*]] = load %struct.S1*, %struct.S1** [[TMP11]], align 8 // CHECK4-NEXT: [[A_I:%.*]] = getelementptr inbounds [[STRUCT_S1:%.*]], %struct.S1* [[TMP12]], i32 0, i32 0 @@ -4692,4 +4692,3 @@ void xxxx() { // CHECK4-NEXT: call void @__cxx_global_var_init() // CHECK4-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/task_if_codegen.cpp b/clang/test/OpenMP/task_if_codegen.cpp index 4bdcf06ecbfd9..41a0dab8affec 100644 --- a/clang/test/OpenMP/task_if_codegen.cpp +++ b/clang/test/OpenMP/task_if_codegen.cpp @@ -134,17 +134,17 @@ int main() { // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: call void @_Z9gtid_testv() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // @@ -238,17 +238,17 @@ int main() { // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.1* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !23 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 // CHECK1-NEXT: call void @_Z3fn7v() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // @@ -274,17 +274,17 @@ int main() { // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.2* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.3* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META27:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META31:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !33 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !33 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !33 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !33 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !33 -// CHECK1-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !33 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !33 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META28:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META32:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !34 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !34 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !34 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !34 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !34 +// CHECK1-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !34 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !34 // CHECK1-NEXT: call void @_Z3fn8v() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // @@ -310,17 +310,17 @@ int main() { // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.4* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.5* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META34:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META37:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META39:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META41:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !43 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !43 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !43 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !43 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !43 -// CHECK1-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !43 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !43 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META35:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META40:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META42:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !44 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !44 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !44 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !44 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !44 +// CHECK1-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !44 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !44 // CHECK1-NEXT: call void @_Z3fn9v() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // @@ -346,17 +346,17 @@ int main() { // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.6* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.7* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META44:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META47:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META51:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !53 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !53 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !53 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !53 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !53 -// CHECK1-NEXT: store %struct.anon.6* [[TMP8]], %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !53 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !53 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META45:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META48:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META50:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META52:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !54 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !54 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !54 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !54 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !54 +// CHECK1-NEXT: store %struct.anon.6* [[TMP8]], %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !54 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !54 // CHECK1-NEXT: call void @_Z4fn10v() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // @@ -509,17 +509,17 @@ int main() { // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.8* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.9* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META54:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META57:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META59:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META61:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !63 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !63 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !63 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !63 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !63 -// CHECK1-NEXT: store %struct.anon.8* [[TMP8]], %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !63 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !63 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META55:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META58:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META60:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META62:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !64 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !64 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !64 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !64 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !64 +// CHECK1-NEXT: store %struct.anon.8* [[TMP8]], %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !64 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !64 // CHECK1-NEXT: call void @_Z3fn1v() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // @@ -545,17 +545,17 @@ int main() { // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.10* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.11* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META64:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META67:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META69:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META71:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !73 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !73 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !73 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !73 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !73 -// CHECK1-NEXT: store %struct.anon.10* [[TMP8]], %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !73 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !73 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META65:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META68:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META70:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META72:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !74 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !74 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !74 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !74 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !74 +// CHECK1-NEXT: store %struct.anon.10* [[TMP8]], %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !74 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !74 // CHECK1-NEXT: call void @_Z3fn2v() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // @@ -581,17 +581,17 @@ int main() { // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.12* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.13* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META74:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META77:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META79:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META81:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !83 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !83 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !83 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !83 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !83 -// CHECK1-NEXT: store %struct.anon.12* [[TMP8]], %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !83 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !83 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META75:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META78:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META80:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META82:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !84 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !84 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !84 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !84 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !84 +// CHECK1-NEXT: store %struct.anon.12* [[TMP8]], %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !84 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !84 // CHECK1-NEXT: call void @_Z3fn3v() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // @@ -617,17 +617,17 @@ int main() { // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.14* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.15* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META84:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META87:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META89:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META91:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !93 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !93 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !93 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !93 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !93 -// CHECK1-NEXT: store %struct.anon.14* [[TMP8]], %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !93 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !93 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META85:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META88:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META90:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META92:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !94 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !94 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !94 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !94 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !94 +// CHECK1-NEXT: store %struct.anon.14* [[TMP8]], %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !94 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !94 // CHECK1-NEXT: call void @_Z3fn4v() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // @@ -653,17 +653,17 @@ int main() { // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.16* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.17* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META94:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META97:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META99:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META101:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !103 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !103 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !103 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !103 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !103 -// CHECK1-NEXT: store %struct.anon.16* [[TMP8]], %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !103 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !103 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META95:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META98:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META100:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META102:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !104 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !104 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !104 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !104 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !104 +// CHECK1-NEXT: store %struct.anon.16* [[TMP8]], %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !104 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !104 // CHECK1-NEXT: call void @_Z3fn5v() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // @@ -689,17 +689,17 @@ int main() { // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.18* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.19* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META104:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META107:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META109:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META111:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !113 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !113 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !113 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !113 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !113 -// CHECK1-NEXT: store %struct.anon.18* [[TMP8]], %struct.anon.18** [[__CONTEXT_ADDR_I]], align 8, !noalias !113 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR_I]], align 8, !noalias !113 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META105:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META108:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META110:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META112:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !114 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !114 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !114 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !114 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !114 +// CHECK1-NEXT: store %struct.anon.18* [[TMP8]], %struct.anon.18** [[__CONTEXT_ADDR_I]], align 8, !noalias !114 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR_I]], align 8, !noalias !114 // CHECK1-NEXT: call void @_Z3fn6v() #[[ATTR3]] // CHECK1-NEXT: ret i32 0 // @@ -751,17 +751,17 @@ int main() { // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK2-NEXT: call void @_Z9gtid_testv() #[[ATTR3]] // CHECK2-NEXT: ret i32 0 // @@ -855,17 +855,17 @@ int main() { // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.1* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !23 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 // CHECK2-NEXT: call void @_Z3fn7v() #[[ATTR3]] // CHECK2-NEXT: ret i32 0 // @@ -891,17 +891,17 @@ int main() { // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.2* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.3* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META27:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META31:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !33 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !33 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !33 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !33 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !33 -// CHECK2-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !33 -// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !33 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META28:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META32:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !34 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !34 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !34 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !34 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !34 +// CHECK2-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !34 +// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !34 // CHECK2-NEXT: call void @_Z3fn8v() #[[ATTR3]] // CHECK2-NEXT: ret i32 0 // @@ -927,17 +927,17 @@ int main() { // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.4* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.5* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META34:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META37:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META39:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META41:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !43 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !43 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !43 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !43 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !43 -// CHECK2-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !43 -// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !43 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META35:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META40:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META42:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !44 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !44 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !44 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !44 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !44 +// CHECK2-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !44 +// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !44 // CHECK2-NEXT: call void @_Z3fn9v() #[[ATTR3]] // CHECK2-NEXT: ret i32 0 // @@ -963,17 +963,17 @@ int main() { // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.6* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.7* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META44:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META47:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META51:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !53 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !53 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !53 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !53 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !53 -// CHECK2-NEXT: store %struct.anon.6* [[TMP8]], %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !53 -// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !53 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META45:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META48:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META50:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META52:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !54 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !54 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !54 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !54 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !54 +// CHECK2-NEXT: store %struct.anon.6* [[TMP8]], %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !54 +// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !54 // CHECK2-NEXT: call void @_Z4fn10v() #[[ATTR3]] // CHECK2-NEXT: ret i32 0 // @@ -1126,17 +1126,17 @@ int main() { // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.8* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.9* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META54:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META57:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META59:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META61:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !63 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !63 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !63 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !63 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !63 -// CHECK2-NEXT: store %struct.anon.8* [[TMP8]], %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !63 -// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !63 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META55:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META58:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META60:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META62:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !64 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !64 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !64 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !64 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !64 +// CHECK2-NEXT: store %struct.anon.8* [[TMP8]], %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !64 +// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !64 // CHECK2-NEXT: call void @_Z3fn1v() #[[ATTR3]] // CHECK2-NEXT: ret i32 0 // @@ -1162,17 +1162,17 @@ int main() { // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.10* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.11* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META64:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META67:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META69:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META71:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !73 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !73 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !73 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !73 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !73 -// CHECK2-NEXT: store %struct.anon.10* [[TMP8]], %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !73 -// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !73 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META65:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META68:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META70:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META72:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !74 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !74 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !74 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !74 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !74 +// CHECK2-NEXT: store %struct.anon.10* [[TMP8]], %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !74 +// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !74 // CHECK2-NEXT: call void @_Z3fn2v() #[[ATTR3]] // CHECK2-NEXT: ret i32 0 // @@ -1198,17 +1198,17 @@ int main() { // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.12* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.13* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META74:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META77:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META79:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META81:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !83 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !83 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !83 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !83 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !83 -// CHECK2-NEXT: store %struct.anon.12* [[TMP8]], %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !83 -// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !83 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META75:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META78:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META80:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META82:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !84 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !84 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !84 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !84 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !84 +// CHECK2-NEXT: store %struct.anon.12* [[TMP8]], %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !84 +// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !84 // CHECK2-NEXT: call void @_Z3fn3v() #[[ATTR3]] // CHECK2-NEXT: ret i32 0 // @@ -1234,17 +1234,17 @@ int main() { // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.14* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.15* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META84:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META87:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META89:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META91:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !93 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !93 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !93 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !93 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !93 -// CHECK2-NEXT: store %struct.anon.14* [[TMP8]], %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !93 -// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !93 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META85:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META88:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META90:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META92:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !94 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !94 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !94 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !94 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !94 +// CHECK2-NEXT: store %struct.anon.14* [[TMP8]], %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !94 +// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !94 // CHECK2-NEXT: call void @_Z3fn4v() #[[ATTR3]] // CHECK2-NEXT: ret i32 0 // @@ -1270,17 +1270,17 @@ int main() { // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.16* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.17* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META94:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META97:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META99:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META101:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !103 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !103 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !103 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !103 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !103 -// CHECK2-NEXT: store %struct.anon.16* [[TMP8]], %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !103 -// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !103 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META95:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META98:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META100:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META102:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !104 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !104 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !104 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !104 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !104 +// CHECK2-NEXT: store %struct.anon.16* [[TMP8]], %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !104 +// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !104 // CHECK2-NEXT: call void @_Z3fn5v() #[[ATTR3]] // CHECK2-NEXT: ret i32 0 // @@ -1306,17 +1306,17 @@ int main() { // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.18* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.19* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META104:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META107:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META109:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META111:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !113 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !113 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !113 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !113 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !113 -// CHECK2-NEXT: store %struct.anon.18* [[TMP8]], %struct.anon.18** [[__CONTEXT_ADDR_I]], align 8, !noalias !113 -// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR_I]], align 8, !noalias !113 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META105:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META108:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META110:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META112:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !114 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !114 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !114 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !114 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !114 +// CHECK2-NEXT: store %struct.anon.18* [[TMP8]], %struct.anon.18** [[__CONTEXT_ADDR_I]], align 8, !noalias !114 +// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR_I]], align 8, !noalias !114 // CHECK2-NEXT: call void @_Z3fn6v() #[[ATTR3]] // CHECK2-NEXT: ret i32 0 // @@ -1368,17 +1368,17 @@ int main() { // CHECK5-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK5-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK5-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK5-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK5-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK5-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK5-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK5-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK5-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK5-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK5-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK5-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK5-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK5-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK5-NEXT: call void @_Z9gtid_testv() #[[ATTR3]] // CHECK5-NEXT: ret i32 0 // @@ -1472,17 +1472,17 @@ int main() { // CHECK5-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK5-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.1* [[TMP3]] to i8* -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) -// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !23 -// CHECK5-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !23 -// CHECK5-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 -// CHECK5-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK5-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !23 -// CHECK5-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK5-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 +// CHECK5-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !24 +// CHECK5-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 +// CHECK5-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 +// CHECK5-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !24 +// CHECK5-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK5-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 // CHECK5-NEXT: call void @_Z3fn7v() #[[ATTR3]] // CHECK5-NEXT: ret i32 0 // @@ -1508,17 +1508,17 @@ int main() { // CHECK5-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.2* // CHECK5-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.3* [[TMP3]] to i8* -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META27:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META31:![0-9]+]]) -// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !33 -// CHECK5-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !33 -// CHECK5-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !33 -// CHECK5-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !33 -// CHECK5-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !33 -// CHECK5-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !33 -// CHECK5-NEXT: [[TMP10:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !33 +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META28:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META32:![0-9]+]]) +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !34 +// CHECK5-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !34 +// CHECK5-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !34 +// CHECK5-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !34 +// CHECK5-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !34 +// CHECK5-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !34 +// CHECK5-NEXT: [[TMP10:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !34 // CHECK5-NEXT: call void @_Z3fn8v() #[[ATTR3]] // CHECK5-NEXT: ret i32 0 // @@ -1544,17 +1544,17 @@ int main() { // CHECK5-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.4* // CHECK5-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.5* [[TMP3]] to i8* -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META34:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META37:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META39:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META41:![0-9]+]]) -// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !43 -// CHECK5-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !43 -// CHECK5-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !43 -// CHECK5-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !43 -// CHECK5-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !43 -// CHECK5-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !43 -// CHECK5-NEXT: [[TMP10:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !43 +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META35:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META40:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META42:![0-9]+]]) +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !44 +// CHECK5-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !44 +// CHECK5-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !44 +// CHECK5-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !44 +// CHECK5-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !44 +// CHECK5-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !44 +// CHECK5-NEXT: [[TMP10:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !44 // CHECK5-NEXT: call void @_Z3fn9v() #[[ATTR3]] // CHECK5-NEXT: ret i32 0 // @@ -1580,17 +1580,17 @@ int main() { // CHECK5-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.6* // CHECK5-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.7* [[TMP3]] to i8* -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META44:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META47:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META51:![0-9]+]]) -// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !53 -// CHECK5-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !53 -// CHECK5-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !53 -// CHECK5-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !53 -// CHECK5-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !53 -// CHECK5-NEXT: store %struct.anon.6* [[TMP8]], %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !53 -// CHECK5-NEXT: [[TMP10:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !53 +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META45:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META48:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META50:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META52:![0-9]+]]) +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !54 +// CHECK5-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !54 +// CHECK5-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !54 +// CHECK5-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !54 +// CHECK5-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !54 +// CHECK5-NEXT: store %struct.anon.6* [[TMP8]], %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !54 +// CHECK5-NEXT: [[TMP10:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !54 // CHECK5-NEXT: call void @_Z4fn10v() #[[ATTR3]] // CHECK5-NEXT: ret i32 0 // @@ -1743,17 +1743,17 @@ int main() { // CHECK5-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.8* // CHECK5-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.9* [[TMP3]] to i8* -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META54:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META57:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META59:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META61:![0-9]+]]) -// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !63 -// CHECK5-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !63 -// CHECK5-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !63 -// CHECK5-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !63 -// CHECK5-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !63 -// CHECK5-NEXT: store %struct.anon.8* [[TMP8]], %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !63 -// CHECK5-NEXT: [[TMP10:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !63 +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META55:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META58:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META60:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META62:![0-9]+]]) +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !64 +// CHECK5-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !64 +// CHECK5-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !64 +// CHECK5-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !64 +// CHECK5-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !64 +// CHECK5-NEXT: store %struct.anon.8* [[TMP8]], %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !64 +// CHECK5-NEXT: [[TMP10:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !64 // CHECK5-NEXT: call void @_Z3fn1v() #[[ATTR3]] // CHECK5-NEXT: ret i32 0 // @@ -1779,17 +1779,17 @@ int main() { // CHECK5-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.10* // CHECK5-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.11* [[TMP3]] to i8* -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META64:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META67:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META69:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META71:![0-9]+]]) -// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !73 -// CHECK5-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !73 -// CHECK5-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !73 -// CHECK5-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !73 -// CHECK5-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !73 -// CHECK5-NEXT: store %struct.anon.10* [[TMP8]], %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !73 -// CHECK5-NEXT: [[TMP10:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !73 +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META65:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META68:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META70:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META72:![0-9]+]]) +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !74 +// CHECK5-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !74 +// CHECK5-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !74 +// CHECK5-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !74 +// CHECK5-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !74 +// CHECK5-NEXT: store %struct.anon.10* [[TMP8]], %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !74 +// CHECK5-NEXT: [[TMP10:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !74 // CHECK5-NEXT: call void @_Z3fn2v() #[[ATTR3]] // CHECK5-NEXT: ret i32 0 // @@ -1815,17 +1815,17 @@ int main() { // CHECK5-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.12* // CHECK5-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.13* [[TMP3]] to i8* -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META74:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META77:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META79:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META81:![0-9]+]]) -// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !83 -// CHECK5-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !83 -// CHECK5-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !83 -// CHECK5-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !83 -// CHECK5-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !83 -// CHECK5-NEXT: store %struct.anon.12* [[TMP8]], %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !83 -// CHECK5-NEXT: [[TMP10:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !83 +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META75:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META78:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META80:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META82:![0-9]+]]) +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !84 +// CHECK5-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !84 +// CHECK5-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !84 +// CHECK5-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !84 +// CHECK5-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !84 +// CHECK5-NEXT: store %struct.anon.12* [[TMP8]], %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !84 +// CHECK5-NEXT: [[TMP10:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !84 // CHECK5-NEXT: call void @_Z3fn3v() #[[ATTR3]] // CHECK5-NEXT: ret i32 0 // @@ -1851,17 +1851,17 @@ int main() { // CHECK5-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.14* // CHECK5-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.15* [[TMP3]] to i8* -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META84:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META87:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META89:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META91:![0-9]+]]) -// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !93 -// CHECK5-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !93 -// CHECK5-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !93 -// CHECK5-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !93 -// CHECK5-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !93 -// CHECK5-NEXT: store %struct.anon.14* [[TMP8]], %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !93 -// CHECK5-NEXT: [[TMP10:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !93 +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META85:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META88:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META90:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META92:![0-9]+]]) +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !94 +// CHECK5-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !94 +// CHECK5-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !94 +// CHECK5-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !94 +// CHECK5-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !94 +// CHECK5-NEXT: store %struct.anon.14* [[TMP8]], %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !94 +// CHECK5-NEXT: [[TMP10:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !94 // CHECK5-NEXT: call void @_Z3fn4v() #[[ATTR3]] // CHECK5-NEXT: ret i32 0 // @@ -1887,17 +1887,17 @@ int main() { // CHECK5-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.16* // CHECK5-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.17* [[TMP3]] to i8* -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META94:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META97:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META99:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META101:![0-9]+]]) -// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !103 -// CHECK5-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !103 -// CHECK5-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !103 -// CHECK5-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !103 -// CHECK5-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !103 -// CHECK5-NEXT: store %struct.anon.16* [[TMP8]], %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !103 -// CHECK5-NEXT: [[TMP10:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !103 +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META95:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META98:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META100:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META102:![0-9]+]]) +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !104 +// CHECK5-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !104 +// CHECK5-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !104 +// CHECK5-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !104 +// CHECK5-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !104 +// CHECK5-NEXT: store %struct.anon.16* [[TMP8]], %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !104 +// CHECK5-NEXT: [[TMP10:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !104 // CHECK5-NEXT: call void @_Z3fn5v() #[[ATTR3]] // CHECK5-NEXT: ret i32 0 // @@ -1923,17 +1923,17 @@ int main() { // CHECK5-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK5-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.18* // CHECK5-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.19* [[TMP3]] to i8* -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META104:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META107:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META109:![0-9]+]]) -// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META111:![0-9]+]]) -// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !113 -// CHECK5-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !113 -// CHECK5-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !113 -// CHECK5-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !113 -// CHECK5-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !113 -// CHECK5-NEXT: store %struct.anon.18* [[TMP8]], %struct.anon.18** [[__CONTEXT_ADDR_I]], align 8, !noalias !113 -// CHECK5-NEXT: [[TMP10:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR_I]], align 8, !noalias !113 +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META105:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META108:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META110:![0-9]+]]) +// CHECK5-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META112:![0-9]+]]) +// CHECK5-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !114 +// CHECK5-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !114 +// CHECK5-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !114 +// CHECK5-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !114 +// CHECK5-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !114 +// CHECK5-NEXT: store %struct.anon.18* [[TMP8]], %struct.anon.18** [[__CONTEXT_ADDR_I]], align 8, !noalias !114 +// CHECK5-NEXT: [[TMP10:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR_I]], align 8, !noalias !114 // CHECK5-NEXT: call void @_Z3fn6v() #[[ATTR3]] // CHECK5-NEXT: ret i32 0 // @@ -1985,17 +1985,17 @@ int main() { // CHECK6-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK6-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon* // CHECK6-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK6-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK6-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK6-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK6-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK6-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK6-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK6-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK6-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK6-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK6-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK6-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK6-NEXT: [[TMP10:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK6-NEXT: call void @_Z9gtid_testv() #[[ATTR3]] // CHECK6-NEXT: ret i32 0 // @@ -2089,17 +2089,17 @@ int main() { // CHECK6-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK6-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.0* // CHECK6-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.1* [[TMP3]] to i8* -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) -// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !23 -// CHECK6-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !23 -// CHECK6-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 -// CHECK6-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK6-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !23 -// CHECK6-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK6-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 +// CHECK6-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !24 +// CHECK6-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 +// CHECK6-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 +// CHECK6-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !24 +// CHECK6-NEXT: store %struct.anon.0* [[TMP8]], %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK6-NEXT: [[TMP10:%.*]] = load %struct.anon.0*, %struct.anon.0** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 // CHECK6-NEXT: call void @_Z3fn7v() #[[ATTR3]] // CHECK6-NEXT: ret i32 0 // @@ -2125,17 +2125,17 @@ int main() { // CHECK6-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK6-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.2* // CHECK6-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.3* [[TMP3]] to i8* -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META24:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META27:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META29:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META31:![0-9]+]]) -// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !33 -// CHECK6-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !33 -// CHECK6-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !33 -// CHECK6-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !33 -// CHECK6-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !33 -// CHECK6-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !33 -// CHECK6-NEXT: [[TMP10:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !33 +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META25:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META28:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META30:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META32:![0-9]+]]) +// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !34 +// CHECK6-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !34 +// CHECK6-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !34 +// CHECK6-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !34 +// CHECK6-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !34 +// CHECK6-NEXT: store %struct.anon.2* [[TMP8]], %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !34 +// CHECK6-NEXT: [[TMP10:%.*]] = load %struct.anon.2*, %struct.anon.2** [[__CONTEXT_ADDR_I]], align 8, !noalias !34 // CHECK6-NEXT: call void @_Z3fn8v() #[[ATTR3]] // CHECK6-NEXT: ret i32 0 // @@ -2161,17 +2161,17 @@ int main() { // CHECK6-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK6-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.4* // CHECK6-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.5* [[TMP3]] to i8* -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META34:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META37:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META39:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META41:![0-9]+]]) -// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !43 -// CHECK6-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !43 -// CHECK6-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !43 -// CHECK6-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !43 -// CHECK6-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !43 -// CHECK6-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !43 -// CHECK6-NEXT: [[TMP10:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !43 +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META35:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META38:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META40:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META42:![0-9]+]]) +// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !44 +// CHECK6-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !44 +// CHECK6-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !44 +// CHECK6-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !44 +// CHECK6-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !44 +// CHECK6-NEXT: store %struct.anon.4* [[TMP8]], %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !44 +// CHECK6-NEXT: [[TMP10:%.*]] = load %struct.anon.4*, %struct.anon.4** [[__CONTEXT_ADDR_I]], align 8, !noalias !44 // CHECK6-NEXT: call void @_Z3fn9v() #[[ATTR3]] // CHECK6-NEXT: ret i32 0 // @@ -2197,17 +2197,17 @@ int main() { // CHECK6-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK6-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.6* // CHECK6-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.7* [[TMP3]] to i8* -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META44:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META47:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META49:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META51:![0-9]+]]) -// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !53 -// CHECK6-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !53 -// CHECK6-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !53 -// CHECK6-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !53 -// CHECK6-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !53 -// CHECK6-NEXT: store %struct.anon.6* [[TMP8]], %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !53 -// CHECK6-NEXT: [[TMP10:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !53 +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META45:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META48:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META50:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META52:![0-9]+]]) +// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !54 +// CHECK6-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !54 +// CHECK6-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !54 +// CHECK6-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !54 +// CHECK6-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !54 +// CHECK6-NEXT: store %struct.anon.6* [[TMP8]], %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !54 +// CHECK6-NEXT: [[TMP10:%.*]] = load %struct.anon.6*, %struct.anon.6** [[__CONTEXT_ADDR_I]], align 8, !noalias !54 // CHECK6-NEXT: call void @_Z4fn10v() #[[ATTR3]] // CHECK6-NEXT: ret i32 0 // @@ -2360,17 +2360,17 @@ int main() { // CHECK6-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK6-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.8* // CHECK6-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.9* [[TMP3]] to i8* -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META54:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META57:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META59:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META61:![0-9]+]]) -// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !63 -// CHECK6-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !63 -// CHECK6-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !63 -// CHECK6-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !63 -// CHECK6-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !63 -// CHECK6-NEXT: store %struct.anon.8* [[TMP8]], %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !63 -// CHECK6-NEXT: [[TMP10:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !63 +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META55:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META58:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META60:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META62:![0-9]+]]) +// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !64 +// CHECK6-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !64 +// CHECK6-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !64 +// CHECK6-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !64 +// CHECK6-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !64 +// CHECK6-NEXT: store %struct.anon.8* [[TMP8]], %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !64 +// CHECK6-NEXT: [[TMP10:%.*]] = load %struct.anon.8*, %struct.anon.8** [[__CONTEXT_ADDR_I]], align 8, !noalias !64 // CHECK6-NEXT: call void @_Z3fn1v() #[[ATTR3]] // CHECK6-NEXT: ret i32 0 // @@ -2396,17 +2396,17 @@ int main() { // CHECK6-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK6-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.10* // CHECK6-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.11* [[TMP3]] to i8* -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META64:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META67:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META69:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META71:![0-9]+]]) -// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !73 -// CHECK6-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !73 -// CHECK6-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !73 -// CHECK6-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !73 -// CHECK6-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !73 -// CHECK6-NEXT: store %struct.anon.10* [[TMP8]], %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !73 -// CHECK6-NEXT: [[TMP10:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !73 +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META65:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META68:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META70:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META72:![0-9]+]]) +// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !74 +// CHECK6-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !74 +// CHECK6-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !74 +// CHECK6-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !74 +// CHECK6-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !74 +// CHECK6-NEXT: store %struct.anon.10* [[TMP8]], %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !74 +// CHECK6-NEXT: [[TMP10:%.*]] = load %struct.anon.10*, %struct.anon.10** [[__CONTEXT_ADDR_I]], align 8, !noalias !74 // CHECK6-NEXT: call void @_Z3fn2v() #[[ATTR3]] // CHECK6-NEXT: ret i32 0 // @@ -2432,17 +2432,17 @@ int main() { // CHECK6-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK6-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.12* // CHECK6-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.13* [[TMP3]] to i8* -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META74:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META77:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META79:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META81:![0-9]+]]) -// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !83 -// CHECK6-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !83 -// CHECK6-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !83 -// CHECK6-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !83 -// CHECK6-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !83 -// CHECK6-NEXT: store %struct.anon.12* [[TMP8]], %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !83 -// CHECK6-NEXT: [[TMP10:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !83 +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META75:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META78:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META80:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META82:![0-9]+]]) +// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !84 +// CHECK6-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !84 +// CHECK6-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !84 +// CHECK6-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !84 +// CHECK6-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !84 +// CHECK6-NEXT: store %struct.anon.12* [[TMP8]], %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !84 +// CHECK6-NEXT: [[TMP10:%.*]] = load %struct.anon.12*, %struct.anon.12** [[__CONTEXT_ADDR_I]], align 8, !noalias !84 // CHECK6-NEXT: call void @_Z3fn3v() #[[ATTR3]] // CHECK6-NEXT: ret i32 0 // @@ -2468,17 +2468,17 @@ int main() { // CHECK6-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK6-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.14* // CHECK6-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.15* [[TMP3]] to i8* -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META84:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META87:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META89:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META91:![0-9]+]]) -// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !93 -// CHECK6-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !93 -// CHECK6-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !93 -// CHECK6-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !93 -// CHECK6-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !93 -// CHECK6-NEXT: store %struct.anon.14* [[TMP8]], %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !93 -// CHECK6-NEXT: [[TMP10:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !93 +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META85:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META88:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META90:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META92:![0-9]+]]) +// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !94 +// CHECK6-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !94 +// CHECK6-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !94 +// CHECK6-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !94 +// CHECK6-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !94 +// CHECK6-NEXT: store %struct.anon.14* [[TMP8]], %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !94 +// CHECK6-NEXT: [[TMP10:%.*]] = load %struct.anon.14*, %struct.anon.14** [[__CONTEXT_ADDR_I]], align 8, !noalias !94 // CHECK6-NEXT: call void @_Z3fn4v() #[[ATTR3]] // CHECK6-NEXT: ret i32 0 // @@ -2504,17 +2504,17 @@ int main() { // CHECK6-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK6-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.16* // CHECK6-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.17* [[TMP3]] to i8* -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META94:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META97:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META99:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META101:![0-9]+]]) -// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !103 -// CHECK6-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !103 -// CHECK6-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !103 -// CHECK6-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !103 -// CHECK6-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !103 -// CHECK6-NEXT: store %struct.anon.16* [[TMP8]], %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !103 -// CHECK6-NEXT: [[TMP10:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !103 +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META95:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META98:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META100:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META102:![0-9]+]]) +// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !104 +// CHECK6-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !104 +// CHECK6-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !104 +// CHECK6-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !104 +// CHECK6-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !104 +// CHECK6-NEXT: store %struct.anon.16* [[TMP8]], %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !104 +// CHECK6-NEXT: [[TMP10:%.*]] = load %struct.anon.16*, %struct.anon.16** [[__CONTEXT_ADDR_I]], align 8, !noalias !104 // CHECK6-NEXT: call void @_Z3fn5v() #[[ATTR3]] // CHECK6-NEXT: ret i32 0 // @@ -2540,18 +2540,17 @@ int main() { // CHECK6-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK6-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.18* // CHECK6-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.19* [[TMP3]] to i8* -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META104:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META107:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META109:![0-9]+]]) -// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META111:![0-9]+]]) -// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !113 -// CHECK6-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !113 -// CHECK6-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !113 -// CHECK6-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !113 -// CHECK6-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !113 -// CHECK6-NEXT: store %struct.anon.18* [[TMP8]], %struct.anon.18** [[__CONTEXT_ADDR_I]], align 8, !noalias !113 -// CHECK6-NEXT: [[TMP10:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR_I]], align 8, !noalias !113 +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META105:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META108:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META110:![0-9]+]]) +// CHECK6-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META112:![0-9]+]]) +// CHECK6-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !114 +// CHECK6-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !114 +// CHECK6-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !114 +// CHECK6-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !114 +// CHECK6-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !114 +// CHECK6-NEXT: store %struct.anon.18* [[TMP8]], %struct.anon.18** [[__CONTEXT_ADDR_I]], align 8, !noalias !114 +// CHECK6-NEXT: [[TMP10:%.*]] = load %struct.anon.18*, %struct.anon.18** [[__CONTEXT_ADDR_I]], align 8, !noalias !114 // CHECK6-NEXT: call void @_Z3fn6v() #[[ATTR3]] // CHECK6-NEXT: ret i32 0 // -// \ No newline at end of file diff --git a/clang/test/OpenMP/task_in_reduction_codegen.cpp b/clang/test/OpenMP/task_in_reduction_codegen.cpp index ca79752c74d24..9f8bf1ce15476 100644 --- a/clang/test/OpenMP/task_in_reduction_codegen.cpp +++ b/clang/test/OpenMP/task_in_reduction_codegen.cpp @@ -588,29 +588,29 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8 -// CHECK1-NEXT: [[TMP15:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP16:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 +// CHECK1-NEXT: [[TMP15:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP16:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = bitcast void (i8*, ...)* [[TMP15]] to void (i8*, i8***, i8***)* // CHECK1-NEXT: call void [[TMP17]](i8* [[TMP16]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR3]] -// CHECK1-NEXT: [[TMP18:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP19:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !11 +// CHECK1-NEXT: [[TMP18:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP19:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 0 // CHECK1-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP20]], align 8 // CHECK1-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP18]], align 8 -// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 +// CHECK1-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP21]] to i8* // CHECK1-NEXT: [[TMP25:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP23]], i8* [[TMP22]], i8* [[TMP24]]) #[[ATTR3]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP25]] to i32* @@ -657,20 +657,20 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK1-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.1* // CHECK1-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.2* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !23 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK1-NEXT: [[TMP10:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 // CHECK1-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP10]], i32 0, i32 0 // CHECK1-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !23 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 // CHECK1-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP12]] to i8* // CHECK1-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP13]], i8* null, i8* [[TMP14]]) #[[ATTR3]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP15]] to i32* @@ -1259,29 +1259,29 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK2-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 // CHECK2-NEXT: [[TMP13:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK2-NEXT: [[TMP14:%.*]] = load i64, i64* [[TMP13]], align 8 -// CHECK2-NEXT: [[TMP15:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP16:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 +// CHECK2-NEXT: [[TMP15:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP16:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK2-NEXT: [[TMP17:%.*]] = bitcast void (i8*, ...)* [[TMP15]] to void (i8*, i8***, i8***)* // CHECK2-NEXT: call void [[TMP17]](i8* [[TMP16]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR3]] -// CHECK2-NEXT: [[TMP18:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP19:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !11 +// CHECK2-NEXT: [[TMP18:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP19:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !12 // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP12]], i32 0, i32 0 // CHECK2-NEXT: [[TMP21:%.*]] = load i32*, i32** [[TMP20]], align 8 // CHECK2-NEXT: [[TMP22:%.*]] = load i8*, i8** [[TMP18]], align 8 -// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 +// CHECK2-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK2-NEXT: [[TMP24:%.*]] = bitcast i32* [[TMP21]] to i8* // CHECK2-NEXT: [[TMP25:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP23]], i8* [[TMP22]], i8* [[TMP24]]) #[[ATTR3]] // CHECK2-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP25]] to i32* @@ -1328,20 +1328,20 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP7:%.*]] = load i8*, i8** [[TMP6]], align 8 // CHECK2-NEXT: [[TMP8:%.*]] = bitcast i8* [[TMP7]] to %struct.anon.1* // CHECK2-NEXT: [[TMP9:%.*]] = bitcast %struct.kmp_task_t_with_privates.2* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META14:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META17:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META19:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META21:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !23 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 -// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !23 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META15:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META18:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META20:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META22:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: store i8* null, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: store void (i8*, ...)* null, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: store i8* [[TMP9]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: store %struct.anon.1* [[TMP8]], %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 +// CHECK2-NEXT: [[TMP10:%.*]] = load %struct.anon.1*, %struct.anon.1** [[__CONTEXT_ADDR_I]], align 8, !noalias !24 // CHECK2-NEXT: [[TMP11:%.*]] = getelementptr inbounds [[STRUCT_ANON_1:%.*]], %struct.anon.1* [[TMP10]], i32 0, i32 0 // CHECK2-NEXT: [[TMP12:%.*]] = load i32*, i32** [[TMP11]], align 8 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !23 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !24 // CHECK2-NEXT: [[TMP14:%.*]] = bitcast i32* [[TMP12]] to i8* // CHECK2-NEXT: [[TMP15:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP13]], i8* null, i8* [[TMP14]]) #[[ATTR3]] // CHECK2-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP15]] to i32* @@ -1394,4 +1394,3 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 // CHECK2-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp index a6e679f43fe25..12804d1b7959a 100644 --- a/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp +++ b/clang/test/OpenMP/taskloop_in_reduction_codegen.cpp @@ -594,35 +594,35 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK1-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// CHECK1-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 1 // CHECK1-NEXT: [[TMP24:%.*]] = load i64, i64* [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 +// CHECK1-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP27:%.*]] = bitcast void (i8*, ...)* [[TMP25]] to void (i8*, i8***, i8***)* // CHECK1-NEXT: call void [[TMP27]](i8* [[TMP26]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR3]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP29:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !13 +// CHECK1-NEXT: [[TMP28:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP29:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP30]], align 8 // CHECK1-NEXT: [[TMP32:%.*]] = load i8*, i8** [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP31]] to i8* // CHECK1-NEXT: [[TMP35:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP33]], i8* [[TMP32]], i8* [[TMP34]]) #[[ATTR3]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP35]] to i32* @@ -637,19 +637,19 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP43:%.*]] = bitcast i16* [[TMP37]] to i8* // CHECK1-NEXT: [[TMP44:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP33]], i8* [[TMP42]], i8* [[TMP43]]) #[[ATTR3]] // CHECK1-NEXT: [[CONV2_I:%.*]] = bitcast i8* [[TMP44]] to i16* -// CHECK1-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 +// CHECK1-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[CONV3_I:%.*]] = trunc i64 [[TMP45]] to i32 -// CHECK1-NEXT: store i32 [[CONV3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK1-NEXT: store i32 [[CONV3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK1: omp.inner.for.cond.i: -// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK1-NEXT: [[CONV4_I:%.*]] = sext i32 [[TMP46]] to i64 -// CHECK1-NEXT: [[TMP47:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 +// CHECK1-NEXT: [[TMP47:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV4_I]], [[TMP47]] // CHECK1-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__9_EXIT:%.*]] // CHECK1: omp.inner.for.body.i: -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 -// CHECK1-NEXT: store i32 [[TMP48]], i32* [[I_I]], align 4, !noalias !13 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK1-NEXT: store i32 [[TMP48]], i32* [[I_I]], align 4, !noalias !14 // CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[CONV_I]], align 4 // CHECK1-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[TMP49]] to i64 // CHECK1-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i16, i16* [[CONV2_I]], i64 [[IDXPROM_I]] @@ -658,9 +658,9 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[CONV_I]], align 4 // CHECK1-NEXT: [[ADD6_I:%.*]] = add nsw i32 [[TMP51]], [[CONV5_I]] // CHECK1-NEXT: store i32 [[ADD6_I]], i32* [[CONV_I]], align 4 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK1-NEXT: [[ADD7_I:%.*]] = add nsw i32 [[TMP52]], 1 -// CHECK1-NEXT: store i32 [[ADD7_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK1-NEXT: store i32 [[ADD7_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I]] // CHECK1: .omp_outlined..9.exit: // CHECK1-NEXT: ret i32 0 @@ -1264,35 +1264,35 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK2-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 -// CHECK2-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// CHECK2-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 1 // CHECK2-NEXT: [[TMP24:%.*]] = load i64, i64* [[TMP23]], align 8 -// CHECK2-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 +// CHECK2-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK2-NEXT: [[TMP27:%.*]] = bitcast void (i8*, ...)* [[TMP25]] to void (i8*, i8***, i8***)* // CHECK2-NEXT: call void [[TMP27]](i8* [[TMP26]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR3]] -// CHECK2-NEXT: [[TMP28:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP29:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !13 +// CHECK2-NEXT: [[TMP28:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP29:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 // CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 0 // CHECK2-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP30]], align 8 // CHECK2-NEXT: [[TMP32:%.*]] = load i8*, i8** [[TMP28]], align 8 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 // CHECK2-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP31]] to i8* // CHECK2-NEXT: [[TMP35:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP33]], i8* [[TMP32]], i8* [[TMP34]]) #[[ATTR3]] // CHECK2-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP35]] to i32* @@ -1307,19 +1307,19 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP43:%.*]] = bitcast i16* [[TMP37]] to i8* // CHECK2-NEXT: [[TMP44:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP33]], i8* [[TMP42]], i8* [[TMP43]]) #[[ATTR3]] // CHECK2-NEXT: [[CONV2_I:%.*]] = bitcast i8* [[TMP44]] to i16* -// CHECK2-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 +// CHECK2-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK2-NEXT: [[CONV3_I:%.*]] = trunc i64 [[TMP45]] to i32 -// CHECK2-NEXT: store i32 [[CONV3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK2-NEXT: store i32 [[CONV3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK2: omp.inner.for.cond.i: -// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK2-NEXT: [[CONV4_I:%.*]] = sext i32 [[TMP46]] to i64 -// CHECK2-NEXT: [[TMP47:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 +// CHECK2-NEXT: [[TMP47:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 // CHECK2-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV4_I]], [[TMP47]] // CHECK2-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__9_EXIT:%.*]] // CHECK2: omp.inner.for.body.i: -// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 -// CHECK2-NEXT: store i32 [[TMP48]], i32* [[I_I]], align 4, !noalias !13 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 +// CHECK2-NEXT: store i32 [[TMP48]], i32* [[I_I]], align 4, !noalias !14 // CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[CONV_I]], align 4 // CHECK2-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[TMP49]] to i64 // CHECK2-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i16, i16* [[CONV2_I]], i64 [[IDXPROM_I]] @@ -1328,9 +1328,9 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP51:%.*]] = load i32, i32* [[CONV_I]], align 4 // CHECK2-NEXT: [[ADD6_I:%.*]] = add nsw i32 [[TMP51]], [[CONV5_I]] // CHECK2-NEXT: store i32 [[ADD6_I]], i32* [[CONV_I]], align 4 -// CHECK2-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK2-NEXT: [[ADD7_I:%.*]] = add nsw i32 [[TMP52]], 1 -// CHECK2-NEXT: store i32 [[ADD7_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK2-NEXT: store i32 [[ADD7_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I]] // CHECK2: .omp_outlined..9.exit: // CHECK2-NEXT: ret i32 0 @@ -1379,4 +1379,3 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[THIS1:%.*]] = load %struct.S*, %struct.S** [[THIS_ADDR]], align 8 // CHECK2-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp b/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp index 7e518bdd09f28..71799c9842b0d 100644 --- a/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp +++ b/clang/test/OpenMP/taskloop_simd_in_reduction_codegen.cpp @@ -594,35 +594,35 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 // CHECK1-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK1-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 -// CHECK1-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// CHECK1-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 1 // CHECK1-NEXT: [[TMP24:%.*]] = load i64, i64* [[TMP23]], align 8 -// CHECK1-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 +// CHECK1-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP27:%.*]] = bitcast void (i8*, ...)* [[TMP25]] to void (i8*, i8***, i8***)* // CHECK1-NEXT: call void [[TMP27]](i8* [[TMP26]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR3]] -// CHECK1-NEXT: [[TMP28:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !13 -// CHECK1-NEXT: [[TMP29:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !13 +// CHECK1-NEXT: [[TMP28:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// CHECK1-NEXT: [[TMP29:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 // CHECK1-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 0 // CHECK1-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP30]], align 8 // CHECK1-NEXT: [[TMP32:%.*]] = load i8*, i8** [[TMP28]], align 8 -// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 +// CHECK1-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 // CHECK1-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP31]] to i8* // CHECK1-NEXT: [[TMP35:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP33]], i8* [[TMP32]], i8* [[TMP34]]) #[[ATTR3]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP35]] to i32* @@ -637,31 +637,31 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP43:%.*]] = bitcast i16* [[TMP37]] to i8* // CHECK1-NEXT: [[TMP44:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP33]], i8* [[TMP42]], i8* [[TMP43]]) #[[ATTR3]] // CHECK1-NEXT: [[CONV2_I:%.*]] = bitcast i8* [[TMP44]] to i16* -// CHECK1-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 +// CHECK1-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK1-NEXT: [[CONV3_I:%.*]] = trunc i64 [[TMP45]] to i32 -// CHECK1-NEXT: store i32 [[CONV3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK1-NEXT: store i32 [[CONV3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK1: omp.inner.for.cond.i: -// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 // CHECK1-NEXT: [[CONV4_I:%.*]] = sext i32 [[TMP46]] to i64 -// CHECK1-NEXT: [[TMP47:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !13, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP47:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14, !llvm.access.group !15 // CHECK1-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV4_I]], [[TMP47]] // CHECK1-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__9_EXIT:%.*]] // CHECK1: omp.inner.for.body.i: -// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK1-NEXT: store i32 [[TMP48]], i32* [[I_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[CONV_I]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK1-NEXT: store i32 [[TMP48]], i32* [[I_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP49:%.*]] = load i32, i32* [[CONV_I]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[TMP49]] to i64 // CHECK1-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i16, i16* [[CONV2_I]], i64 [[IDXPROM_I]] -// CHECK1-NEXT: [[TMP50:%.*]] = load i16, i16* [[ARRAYIDX_I]], align 2, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP50:%.*]] = load i16, i16* [[ARRAYIDX_I]], align 2, !llvm.access.group !15 // CHECK1-NEXT: [[CONV5_I:%.*]] = sext i16 [[TMP50]] to i32 -// CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[CONV_I]], align 4, !llvm.access.group !14 +// CHECK1-NEXT: [[TMP51:%.*]] = load i32, i32* [[CONV_I]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[ADD6_I:%.*]] = add nsw i32 [[TMP51]], [[CONV5_I]] -// CHECK1-NEXT: store i32 [[ADD6_I]], i32* [[CONV_I]], align 4, !llvm.access.group !14 -// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 +// CHECK1-NEXT: store i32 [[ADD6_I]], i32* [[CONV_I]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 // CHECK1-NEXT: [[ADD7_I:%.*]] = add nsw i32 [[TMP52]], 1 -// CHECK1-NEXT: store i32 [[ADD7_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD7_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: .omp_outlined..9.exit: // CHECK1-NEXT: ret i32 0 // @@ -1264,35 +1264,35 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP19:%.*]] = load i32, i32* [[TMP18]], align 8 // CHECK2-NEXT: [[TMP20:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T]], %struct.kmp_task_t* [[TMP4]], i32 0, i32 9 // CHECK2-NEXT: [[TMP21:%.*]] = load i8*, i8** [[TMP20]], align 8 -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META11:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !13 -// CHECK2-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !13 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META12:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i64 [[TMP13]], i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i64 [[TMP15]], i64* [[DOTUB__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i64 [[TMP17]], i64* [[DOTST__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store i32 [[TMP19]], i32* [[DOTLITER__ADDR_I]], align 4, !noalias !14 +// CHECK2-NEXT: store i8* [[TMP21]], i8** [[DOTREDUCTIONS__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP22:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !14 // CHECK2-NEXT: [[TMP23:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP22]], i32 0, i32 1 // CHECK2-NEXT: [[TMP24:%.*]] = load i64, i64* [[TMP23]], align 8 -// CHECK2-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !13 +// CHECK2-NEXT: [[TMP25:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP26:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !14 // CHECK2-NEXT: [[TMP27:%.*]] = bitcast void (i8*, ...)* [[TMP25]] to void (i8*, i8***, i8***)* // CHECK2-NEXT: call void [[TMP27]](i8* [[TMP26]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]]) #[[ATTR3]] -// CHECK2-NEXT: [[TMP28:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !13 -// CHECK2-NEXT: [[TMP29:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !13 +// CHECK2-NEXT: [[TMP28:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !14 +// CHECK2-NEXT: [[TMP29:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR1_I]], align 8, !noalias !14 // CHECK2-NEXT: [[TMP30:%.*]] = getelementptr inbounds [[STRUCT_ANON]], %struct.anon* [[TMP22]], i32 0, i32 0 // CHECK2-NEXT: [[TMP31:%.*]] = load i32*, i32** [[TMP30]], align 8 // CHECK2-NEXT: [[TMP32:%.*]] = load i8*, i8** [[TMP28]], align 8 -// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !13 +// CHECK2-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !14 // CHECK2-NEXT: [[TMP34:%.*]] = bitcast i32* [[TMP31]] to i8* // CHECK2-NEXT: [[TMP35:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP33]], i8* [[TMP32]], i8* [[TMP34]]) #[[ATTR3]] // CHECK2-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP35]] to i32* @@ -1307,31 +1307,31 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP43:%.*]] = bitcast i16* [[TMP37]] to i8* // CHECK2-NEXT: [[TMP44:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP33]], i8* [[TMP42]], i8* [[TMP43]]) #[[ATTR3]] // CHECK2-NEXT: [[CONV2_I:%.*]] = bitcast i8* [[TMP44]] to i16* -// CHECK2-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !13 +// CHECK2-NEXT: [[TMP45:%.*]] = load i64, i64* [[DOTLB__ADDR_I]], align 8, !noalias !14 // CHECK2-NEXT: [[CONV3_I:%.*]] = trunc i64 [[TMP45]] to i32 -// CHECK2-NEXT: store i32 [[CONV3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13 +// CHECK2-NEXT: store i32 [[CONV3_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I:%.*]] // CHECK2: omp.inner.for.cond.i: -// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP46:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 // CHECK2-NEXT: [[CONV4_I:%.*]] = sext i32 [[TMP46]] to i64 -// CHECK2-NEXT: [[TMP47:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !13, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP47:%.*]] = load i64, i64* [[DOTUB__ADDR_I]], align 8, !noalias !14, !llvm.access.group !15 // CHECK2-NEXT: [[CMP_I:%.*]] = icmp ule i64 [[CONV4_I]], [[TMP47]] // CHECK2-NEXT: br i1 [[CMP_I]], label [[OMP_INNER_FOR_BODY_I:%.*]], label [[DOTOMP_OUTLINED__9_EXIT:%.*]] // CHECK2: omp.inner.for.body.i: -// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK2-NEXT: store i32 [[TMP48]], i32* [[I_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[CONV_I]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP48:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK2-NEXT: store i32 [[TMP48]], i32* [[I_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP49:%.*]] = load i32, i32* [[CONV_I]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[IDXPROM_I:%.*]] = sext i32 [[TMP49]] to i64 // CHECK2-NEXT: [[ARRAYIDX_I:%.*]] = getelementptr inbounds i16, i16* [[CONV2_I]], i64 [[IDXPROM_I]] -// CHECK2-NEXT: [[TMP50:%.*]] = load i16, i16* [[ARRAYIDX_I]], align 2, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP50:%.*]] = load i16, i16* [[ARRAYIDX_I]], align 2, !llvm.access.group !15 // CHECK2-NEXT: [[CONV5_I:%.*]] = sext i16 [[TMP50]] to i32 -// CHECK2-NEXT: [[TMP51:%.*]] = load i32, i32* [[CONV_I]], align 4, !llvm.access.group !14 +// CHECK2-NEXT: [[TMP51:%.*]] = load i32, i32* [[CONV_I]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[ADD6_I:%.*]] = add nsw i32 [[TMP51]], [[CONV5_I]] -// CHECK2-NEXT: store i32 [[ADD6_I]], i32* [[CONV_I]], align 4, !llvm.access.group !14 -// CHECK2-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 +// CHECK2-NEXT: store i32 [[ADD6_I]], i32* [[CONV_I]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP52:%.*]] = load i32, i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 // CHECK2-NEXT: [[ADD7_I:%.*]] = add nsw i32 [[TMP52]], 1 -// CHECK2-NEXT: store i32 [[ADD7_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !13, !llvm.access.group !14 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD7_I]], i32* [[DOTOMP_IV_I]], align 4, !noalias !14, !llvm.access.group !15 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND_I]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK2: .omp_outlined..9.exit: // CHECK2-NEXT: ret i32 0 // diff --git a/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp index d7ef3bde15e95..a16577e47b566 100644 --- a/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_dist_schedule_codegen.cpp @@ -432,28 +432,28 @@ int main (int argc, char **argv) { // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !7 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !7 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !7 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -779,28 +779,28 @@ int main (int argc, char **argv) { // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !7 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !7 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !7 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !8 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -1124,27 +1124,27 @@ int main (int argc, char **argv) { // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !8 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -1468,27 +1468,27 @@ int main (int argc, char **argv) { // CHECK4: omp.dispatch.body: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !8 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !8 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !8 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !8 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -2053,27 +2053,27 @@ int main (int argc, char **argv) { // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !10 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !10 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !11 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: @@ -2380,27 +2380,27 @@ int main (int argc, char **argv) { // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: @@ -2965,27 +2965,27 @@ int main (int argc, char **argv) { // CHECK10: omp.dispatch.body: // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !10 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !11 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !11 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK10-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK10-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: @@ -3292,27 +3292,27 @@ int main (int argc, char **argv) { // CHECK10: omp.dispatch.body: // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: @@ -3869,26 +3869,26 @@ int main (int argc, char **argv) { // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !11 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !11 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !12 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP20]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: @@ -4193,26 +4193,26 @@ int main (int argc, char **argv) { // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: @@ -4769,26 +4769,26 @@ int main (int argc, char **argv) { // CHECK12: omp.dispatch.body: // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK12-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK12-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !11 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !11 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !12 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !12 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP20]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK12: omp.dispatch.inc: @@ -5093,26 +5093,26 @@ int main (int argc, char **argv) { // CHECK12: omp.dispatch.body: // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK12: omp.dispatch.inc: @@ -5136,4 +5136,3 @@ int main (int argc, char **argv) { // CHECK12-NEXT: call void @__tgt_register_requires(i64 1) // CHECK12-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp index 303b0bb201326..4b239f9447a7b 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_reduction_task_codegen.cpp @@ -817,26 +817,26 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK1-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK1-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK1-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK1-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK1-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK1-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK1-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5]] -// CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !11 +// CHECK1-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK1-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK1-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK1-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 +// CHECK1-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK1-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK1-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK1-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* @@ -870,8 +870,8 @@ int main(int argc, char **argv) { // CHECK1-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] // CHECK1-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK1-NEXT: [[TMP48:%.*]] = getelementptr i8, i8* [[TMP40]], i64 [[TMP47]] -// CHECK1-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !11 -// CHECK1-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !11 +// CHECK1-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !12 +// CHECK1-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !12 // CHECK1-NEXT: ret i32 0 // // @@ -1754,26 +1754,26 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP9:%.*]] = getelementptr inbounds [[STRUCT_KMP_TASK_T_WITH_PRIVATES]], %struct.kmp_task_t_with_privates* [[TMP3]], i32 0, i32 1 // CHECK2-NEXT: [[TMP10:%.*]] = bitcast %struct..kmp_privates.t* [[TMP9]] to i8* // CHECK2-NEXT: [[TMP11:%.*]] = bitcast %struct.kmp_task_t_with_privates* [[TMP3]] to i8* -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META2:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META5:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META7:![0-9]+]]) -// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META9:![0-9]+]]) -// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 -// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !11 -// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !11 +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META3:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META6:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META8:![0-9]+]]) +// CHECK2-NEXT: call void @llvm.experimental.noalias.scope.decl(metadata [[META10:![0-9]+]]) +// CHECK2-NEXT: store i32 [[TMP2]], i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 +// CHECK2-NEXT: store i32* [[TMP5]], i32** [[DOTPART_ID__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP10]], i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store void (i8*, ...)* bitcast (void (%struct..kmp_privates.t*, i8***)* @.omp_task_privates_map. to void (i8*, ...)*), void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP11]], i8** [[DOTTASK_T__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: store %struct.anon* [[TMP8]], %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP12:%.*]] = load %struct.anon*, %struct.anon** [[__CONTEXT_ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP13:%.*]] = load void (i8*, ...)*, void (i8*, ...)** [[DOTCOPY_FN__ADDR_I]], align 8, !noalias !12 +// CHECK2-NEXT: [[TMP14:%.*]] = load i8*, i8** [[DOTPRIVATES__ADDR_I]], align 8, !noalias !12 // CHECK2-NEXT: [[TMP15:%.*]] = bitcast void (i8*, ...)* [[TMP13]] to void (i8*, i8***)* // CHECK2-NEXT: call void [[TMP15]](i8* [[TMP14]], i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]]) #[[ATTR5]] -// CHECK2-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !11 +// CHECK2-NEXT: [[TMP16:%.*]] = load i8**, i8*** [[DOTFIRSTPRIV_PTR_ADDR_I]], align 8, !noalias !12 // CHECK2-NEXT: [[TMP17:%.*]] = getelementptr inbounds [[STRUCT_ANON:%.*]], %struct.anon* [[TMP12]], i32 0, i32 1 // CHECK2-NEXT: [[TMP18:%.*]] = load i32*, i32** [[TMP17]], align 8 // CHECK2-NEXT: [[TMP19:%.*]] = load i8*, i8** [[TMP16]], align 8 -// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !11 +// CHECK2-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTGLOBAL_TID__ADDR_I]], align 4, !noalias !12 // CHECK2-NEXT: [[TMP21:%.*]] = bitcast i32* [[TMP18]] to i8* // CHECK2-NEXT: [[TMP22:%.*]] = call i8* @__kmpc_task_reduction_get_th_data(i32 [[TMP20]], i8* [[TMP19]], i8* [[TMP21]]) #[[ATTR5]] // CHECK2-NEXT: [[CONV_I:%.*]] = bitcast i8* [[TMP22]] to i32* @@ -1807,8 +1807,8 @@ int main(int argc, char **argv) { // CHECK2-NEXT: [[TMP46:%.*]] = sub i64 [[TMP44]], [[TMP45]] // CHECK2-NEXT: [[TMP47:%.*]] = sdiv exact i64 [[TMP46]], ptrtoint (i8* getelementptr (i8, i8* null, i32 1) to i64) // CHECK2-NEXT: [[TMP48:%.*]] = getelementptr i8, i8* [[TMP40]], i64 [[TMP47]] -// CHECK2-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !11 -// CHECK2-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !11 +// CHECK2-NEXT: store i8** [[TMP4_I]], i8*** [[TMP_I]], align 8, !noalias !12 +// CHECK2-NEXT: store i8* [[TMP48]], i8** [[TMP4_I]], align 8, !noalias !12 // CHECK2-NEXT: ret i32 0 // // @@ -1909,4 +1909,3 @@ int main(int argc, char **argv) { // CHECK2: omp.arraycpy.done5: // CHECK2-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp index 830fc0dd91b08..a48230f44f502 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_schedule_codegen.cpp @@ -937,28 +937,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -1086,28 +1086,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -1852,28 +1852,28 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -2001,28 +2001,28 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -2745,27 +2745,27 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -2889,27 +2889,27 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK3-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -3632,27 +3632,27 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK4-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK4-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -3776,27 +3776,27 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK4-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK4-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -4541,28 +4541,28 @@ int main (int argc, char **argv) { // CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: @@ -4690,28 +4690,28 @@ int main (int argc, char **argv) { // CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 // CHECK5-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: @@ -5456,28 +5456,28 @@ int main (int argc, char **argv) { // CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 // CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 // CHECK6-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK6-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK6-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: @@ -5605,28 +5605,28 @@ int main (int argc, char **argv) { // CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 // CHECK6-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK6-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK6-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: @@ -6349,27 +6349,27 @@ int main (int argc, char **argv) { // CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: @@ -6493,27 +6493,27 @@ int main (int argc, char **argv) { // CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 // CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 +// CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: @@ -7236,27 +7236,27 @@ int main (int argc, char **argv) { // CHECK8-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 // CHECK8-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK8-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK8: omp.dispatch.inc: @@ -7380,27 +7380,27 @@ int main (int argc, char **argv) { // CHECK8-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK8-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 +// CHECK8-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK8: omp.dispatch.inc: @@ -8656,27 +8656,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !14 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !14 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !15 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 +// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK13-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: @@ -8887,27 +8887,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !17 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !18 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: @@ -9679,27 +9679,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !20 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !20 +// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK13-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: @@ -9850,27 +9850,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !23 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !23 +// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: @@ -11126,27 +11126,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !14 -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !14 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !15 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !15 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 +// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK14-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK14-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: @@ -11357,27 +11357,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK14-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK14-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !17 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !17 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !18 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !18 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: @@ -12149,27 +12149,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !20 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !20 +// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK14-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK14-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: @@ -12320,27 +12320,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !23 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !23 +// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK14-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK14-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: @@ -13565,26 +13565,26 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK15-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK15-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !15 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !15 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP20]] -// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 +// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK15-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK15-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK15: omp.dispatch.inc: @@ -13785,26 +13785,26 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !18 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP21]] -// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK15-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK15: omp.dispatch.inc: @@ -14547,26 +14547,26 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP12]] -// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 +// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK15: omp.dispatch.inc: @@ -14708,26 +14708,26 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 +// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK15-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK15: omp.dispatch.inc: @@ -15952,26 +15952,26 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK16-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK16-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !15 -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !15 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP20]] -// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 +// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK16-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK16-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK16: omp.dispatch.inc: @@ -16172,26 +16172,26 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK16-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK16-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !18 -// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !18 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP21]] -// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK16-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK16-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK16-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK16: omp.dispatch.inc: @@ -16934,26 +16934,26 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP12]] -// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 +// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK16: omp.dispatch.inc: @@ -17095,26 +17095,26 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 +// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK16-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK16: omp.dispatch.inc: @@ -18370,27 +18370,27 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !14 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !15 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !15 // CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK17-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: @@ -18601,27 +18601,27 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK17-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !17 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !17 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !18 // CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: @@ -19393,27 +19393,27 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !20 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 // CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !20 +// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK17-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: @@ -19564,27 +19564,27 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 // CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: @@ -20840,27 +20840,27 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK18-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK18-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !14 -// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !14 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !15 +// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !15 // CHECK18-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 +// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK18-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK18-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: @@ -21071,27 +21071,27 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK18-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK18-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !17 -// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !17 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !18 +// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !18 // CHECK18-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK18-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK18-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: @@ -21863,27 +21863,27 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK18-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK18-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !20 -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 // CHECK18-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !20 +// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK18-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK18-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: @@ -22034,27 +22034,27 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK18-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK18-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !23 -// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !23 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 +// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 // CHECK18-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !23 +// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK18-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: @@ -23279,26 +23279,26 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK19-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK19-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !15 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !15 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP20]] -// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 +// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK19-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: @@ -23499,26 +23499,26 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !18 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP21]] -// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: @@ -24261,26 +24261,26 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP12]] -// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 +// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK19-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: @@ -24422,26 +24422,26 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 +// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK19-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: @@ -25666,26 +25666,26 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 +// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK20-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK20-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !15 -// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !15 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP20]] -// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 +// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK20-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK20-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK20-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK20: omp.dispatch.inc: @@ -25886,26 +25886,26 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK20-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK20-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !18 -// CHECK20-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !18 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !19 +// CHECK20-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP21]] -// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK20-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK20-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK20-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK20: omp.dispatch.inc: @@ -26648,26 +26648,26 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK20-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK20-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 -// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP12]] -// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 +// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK20-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK20-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK20: omp.dispatch.inc: @@ -26809,26 +26809,26 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK20-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK20-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 -// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 +// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 +// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK20-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK20-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK20: omp.dispatch.inc: @@ -26843,4 +26843,3 @@ int main (int argc, char **argv) { // CHECK20-NEXT: call void @__tgt_register_requires(i64 1) // CHECK20-NEXT: ret void // -// \ No newline at end of file diff --git a/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp index 2f77956b99000..00ec282f9322c 100644 --- a/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_parallel_for_simd_schedule_codegen.cpp @@ -402,23 +402,23 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !8 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !8 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -482,28 +482,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -567,23 +567,23 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !17 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !17 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -647,28 +647,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !20 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -732,23 +732,23 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !23 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -824,28 +824,28 @@ int main (int argc, char **argv) { // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -919,23 +919,23 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !29 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -996,28 +996,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -1082,23 +1082,23 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK1-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !35 // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -1159,28 +1159,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK1-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !38 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK1-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -1387,23 +1387,23 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !8 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !8 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1467,28 +1467,28 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1552,23 +1552,23 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !17 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !17 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1632,28 +1632,28 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !20 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1717,23 +1717,23 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 // CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !23 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1809,28 +1809,28 @@ int main (int argc, char **argv) { // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -1904,23 +1904,23 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !29 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1981,28 +1981,28 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -2067,23 +2067,23 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK2-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !35 // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -2144,28 +2144,28 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK2-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !38 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK2-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -2372,23 +2372,23 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !8 // CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 // CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !8 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !8 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2452,28 +2452,28 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2537,23 +2537,23 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !17 // CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 // CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !17 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !17 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2617,28 +2617,28 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !20 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2702,23 +2702,23 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !23 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2794,28 +2794,28 @@ int main (int argc, char **argv) { // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -2889,23 +2889,23 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !29 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -2966,28 +2966,28 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 // CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !32 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !32 // CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -3052,23 +3052,23 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK3-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !35 // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -3129,28 +3129,28 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 // CHECK3-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK3-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !38 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !38 // CHECK3-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !38 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK3-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -3357,23 +3357,23 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !8 // CHECK4-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !8 // CHECK4-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !8 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !8 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !8 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3437,28 +3437,28 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3522,23 +3522,23 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !17 // CHECK4-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !17 // CHECK4-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !17 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !17 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3602,28 +3602,28 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !20 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !20 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3687,23 +3687,23 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !23 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3779,28 +3779,28 @@ int main (int argc, char **argv) { // CHECK4: omp.dispatch.body: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK4-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP15]] to i64 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -3874,23 +3874,23 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !29 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -3951,28 +3951,28 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 // CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !17 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !32 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !17 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !32 // CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -4037,23 +4037,23 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]) +// CHECK4-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], %struct.SS* [[TMP0]]), !llvm.access.group !35 // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -4114,28 +4114,28 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 // CHECK4-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK4-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !38 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !38 // CHECK4-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !38 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK4-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -4342,21 +4342,21 @@ int main (int argc, char **argv) { // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !9 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4418,27 +4418,27 @@ int main (int argc, char **argv) { // CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4502,21 +4502,21 @@ int main (int argc, char **argv) { // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !18 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4578,27 +4578,27 @@ int main (int argc, char **argv) { // CHECK5-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK5-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4662,21 +4662,21 @@ int main (int argc, char **argv) { // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !24 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4747,27 +4747,27 @@ int main (int argc, char **argv) { // CHECK5: omp.dispatch.body: // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK5-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK5-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK5-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !27 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP15]] -// CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK5-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK5-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: @@ -4841,21 +4841,21 @@ int main (int argc, char **argv) { // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !30 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -4914,27 +4914,27 @@ int main (int argc, char **argv) { // CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !33 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: @@ -4999,21 +4999,21 @@ int main (int argc, char **argv) { // CHECK5-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK5-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK5-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK5-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK5-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !36 // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK5: omp.loop.exit: @@ -5072,27 +5072,27 @@ int main (int argc, char **argv) { // CHECK5-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK5-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK5: omp.inner.for.cond: -// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK5-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK5-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK5-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK5-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK5: omp.inner.for.body: -// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK5-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK5-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK5-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK5-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 // CHECK5-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 +// CHECK5-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !39 // CHECK5-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 +// CHECK5-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !39 // CHECK5-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK5: omp.body.continue: // CHECK5-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK5: omp.inner.for.inc: -// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK5-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK5-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK5-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK5-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK5: omp.inner.for.end: // CHECK5-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK5: omp.dispatch.inc: @@ -5299,21 +5299,21 @@ int main (int argc, char **argv) { // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !9 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5375,27 +5375,27 @@ int main (int argc, char **argv) { // CHECK6-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK6-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK6-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5459,21 +5459,21 @@ int main (int argc, char **argv) { // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !18 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5535,27 +5535,27 @@ int main (int argc, char **argv) { // CHECK6-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK6-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK6-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5619,21 +5619,21 @@ int main (int argc, char **argv) { // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !24 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5704,27 +5704,27 @@ int main (int argc, char **argv) { // CHECK6: omp.dispatch.body: // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK6-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK6-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK6-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !27 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP15]] -// CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK6-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK6-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: @@ -5798,21 +5798,21 @@ int main (int argc, char **argv) { // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !30 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -5871,27 +5871,27 @@ int main (int argc, char **argv) { // CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 // CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !33 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: @@ -5956,21 +5956,21 @@ int main (int argc, char **argv) { // CHECK6-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK6-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK6-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK6-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK6-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK6-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK6-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !36 // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK6: omp.loop.exit: @@ -6029,27 +6029,27 @@ int main (int argc, char **argv) { // CHECK6-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK6-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK6: omp.inner.for.cond: -// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK6-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK6-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK6-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK6-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK6: omp.inner.for.body: -// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK6-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK6-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK6-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK6-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 // CHECK6-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 +// CHECK6-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !39 // CHECK6-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 +// CHECK6-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !39 // CHECK6-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK6: omp.body.continue: // CHECK6-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK6: omp.inner.for.inc: -// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK6-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK6-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK6-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK6-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK6: omp.inner.for.end: // CHECK6-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK6: omp.dispatch.inc: @@ -6256,21 +6256,21 @@ int main (int argc, char **argv) { // CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !9 // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -6332,27 +6332,27 @@ int main (int argc, char **argv) { // CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -6416,21 +6416,21 @@ int main (int argc, char **argv) { // CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !18 // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -6492,27 +6492,27 @@ int main (int argc, char **argv) { // CHECK7-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 // CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK7-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK7-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -6576,21 +6576,21 @@ int main (int argc, char **argv) { // CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !24 // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -6661,27 +6661,27 @@ int main (int argc, char **argv) { // CHECK7: omp.dispatch.body: // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK7-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK7-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK7-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !27 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP15]] -// CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK7-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK7-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK7-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: @@ -6755,21 +6755,21 @@ int main (int argc, char **argv) { // CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !30 // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -6828,27 +6828,27 @@ int main (int argc, char **argv) { // CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 // CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !33 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: @@ -6913,21 +6913,21 @@ int main (int argc, char **argv) { // CHECK7-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK7-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK7-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK7-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK7-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !36 // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK7: omp.loop.exit: @@ -6986,27 +6986,27 @@ int main (int argc, char **argv) { // CHECK7-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK7-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK7: omp.inner.for.cond: -// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK7-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK7-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK7-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK7: omp.inner.for.body: -// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK7-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK7-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 // CHECK7-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !39 // CHECK7-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !39 // CHECK7-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK7: omp.body.continue: // CHECK7-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK7: omp.inner.for.inc: -// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK7-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK7-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK7-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK7-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK7: omp.inner.for.end: // CHECK7-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK7: omp.dispatch.inc: @@ -7213,21 +7213,21 @@ int main (int argc, char **argv) { // CHECK8-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK8-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !9 // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !9 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -7289,27 +7289,27 @@ int main (int argc, char **argv) { // CHECK8-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK8-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK8-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK8-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK8-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK8-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK8-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK8-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -7373,21 +7373,21 @@ int main (int argc, char **argv) { // CHECK8-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 // CHECK8-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK8-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !18 // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !18 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -7449,27 +7449,27 @@ int main (int argc, char **argv) { // CHECK8-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK8-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK8-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 // CHECK8-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK8-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK8-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK8-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK8-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -7533,21 +7533,21 @@ int main (int argc, char **argv) { // CHECK8-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 // CHECK8-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK8-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !24 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !24 +// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..7 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !24 // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !24 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -7618,27 +7618,27 @@ int main (int argc, char **argv) { // CHECK8: omp.dispatch.body: // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK8-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP12]], [[TMP13]] // CHECK8-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP14]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 // CHECK8-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4 +// CHECK8-NEXT: [[TMP15:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !27 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP15]] -// CHECK8-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK8-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK8-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK8-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP16]], 1 -// CHECK8-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK8: omp.dispatch.inc: @@ -7712,21 +7712,21 @@ int main (int argc, char **argv) { // CHECK8-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 // CHECK8-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK8-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !30 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !30 +// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..11 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !30 // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !30 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !30 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -7785,27 +7785,27 @@ int main (int argc, char **argv) { // CHECK8-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !33 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !33 // CHECK8-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !33 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK8-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !33 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !33 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK8: omp.dispatch.inc: @@ -7870,21 +7870,21 @@ int main (int argc, char **argv) { // CHECK8-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK8-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK8-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 // CHECK8-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK8-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]) +// CHECK8-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !36 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !36 +// CHECK8-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, %struct.SS*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], %struct.SS* [[TMP0]]), !llvm.access.group !36 // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !36 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK8: omp.loop.exit: @@ -7943,27 +7943,27 @@ int main (int argc, char **argv) { // CHECK8-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK8-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK8: omp.inner.for.cond: -// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK8-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !39 // CHECK8-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK8-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK8: omp.inner.for.body: -// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK8-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK8-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !39 // CHECK8-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !39 // CHECK8-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP12]] -// CHECK8-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !39 // CHECK8-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK8: omp.body.continue: // CHECK8-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK8: omp.inner.for.inc: -// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK8-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 // CHECK8-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK8-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !39 +// CHECK8-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] // CHECK8: omp.inner.for.end: // CHECK8-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK8: omp.dispatch.inc: @@ -9215,23 +9215,23 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !13 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -9328,27 +9328,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !17 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -9451,23 +9451,23 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !22 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -9564,27 +9564,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !25 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK13-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -9703,51 +9703,51 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK13-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !28 // CHECK13-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4 -// CHECK13-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]) +// CHECK13-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]), !llvm.access.group !28 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// CHECK13-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !28 // CHECK13-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] // CHECK13-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] // CHECK13: cond.true12: -// CHECK13-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK13-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !28 // CHECK13-NEXT: br label [[COND_END14:%.*]] // CHECK13: cond.false13: -// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK13-NEXT: br label [[COND_END14]] // CHECK13: cond.end14: // CHECK13-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE12]] ], [ [[TMP32]], [[COND_FALSE13]] ] -// CHECK13-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK13-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK13-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK13-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -9847,27 +9847,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 // CHECK13-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK13-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !31 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !31 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !31 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK13-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -9970,23 +9970,23 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 // CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !34 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -10080,27 +10080,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 // CHECK13-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK13-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !37 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !37 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !37 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK13-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK13-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: @@ -10217,27 +10217,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK13-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK13-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40 // CHECK13-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK13-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !40 // CHECK13-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP20]], i32* [[CONV7]], align 4 -// CHECK13-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP21]]) +// CHECK13-NEXT: store i32 [[TMP20]], i32* [[CONV7]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP21]]), !llvm.access.group !40 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -10335,27 +10335,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 +// CHECK13-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK13-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK13-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !26 -// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !26 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !43 +// CHECK13-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !43 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 +// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !43 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK13-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: @@ -10574,23 +10574,23 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 // CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !46 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -10654,27 +10654,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 // CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !49 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !49 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -10738,23 +10738,23 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 // CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !52 // CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !52 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !52 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -10818,27 +10818,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !55 // CHECK13-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK13-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !55 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !55 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !55 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -10917,27 +10917,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 // CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !58 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58 +// CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !58 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -11017,27 +11017,27 @@ int main (int argc, char **argv) { // CHECK13: omp.dispatch.body: // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 // CHECK13-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK13-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61 +// CHECK13-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !61 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !61 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK13-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK13-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: @@ -11111,23 +11111,23 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 // CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !64 // CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !64 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !64 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP65:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -11188,27 +11188,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 +// CHECK13-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !67 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !36 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !36 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !67 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !67 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !36 +// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !67 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 // CHECK13-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK13-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP68:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: @@ -11288,27 +11288,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK13-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK13-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK13-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK13-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !70 // CHECK13-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK13-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !70 // CHECK13-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK13-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70 +// CHECK13-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70 +// CHECK13-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !70 // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK13-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !70 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP71:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK13: omp.loop.exit: @@ -11373,27 +11373,27 @@ int main (int argc, char **argv) { // CHECK13-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK13-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK13: omp.inner.for.cond: -// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK13-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !73 // CHECK13-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK13-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK13: omp.inner.for.body: -// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 // CHECK13-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK13-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !40 -// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !73 +// CHECK13-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !73 // CHECK13-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK13-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !73 // CHECK13-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK13: omp.body.continue: // CHECK13-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK13: omp.inner.for.inc: -// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK13-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 // CHECK13-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] +// CHECK13-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK13-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP74:![0-9]+]] // CHECK13: omp.inner.for.end: // CHECK13-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK13: omp.dispatch.inc: @@ -11875,23 +11875,23 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !13 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -11988,27 +11988,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !17 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !17 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -12111,23 +12111,23 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !22 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -12224,27 +12224,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !25 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK14-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -12363,51 +12363,51 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK14-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] // CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !28 // CHECK14-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK14-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4 -// CHECK14-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]) +// CHECK14-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]), !llvm.access.group !28 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// CHECK14-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !28 // CHECK14-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] // CHECK14-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] // CHECK14: cond.true12: -// CHECK14-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK14-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !28 // CHECK14-NEXT: br label [[COND_END14:%.*]] // CHECK14: cond.false13: -// CHECK14-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK14-NEXT: br label [[COND_END14]] // CHECK14: cond.end14: // CHECK14-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE12]] ], [ [[TMP32]], [[COND_FALSE13]] ] -// CHECK14-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK14-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK14-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK14-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -12507,27 +12507,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 // CHECK14-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK14-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !31 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !31 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !31 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK14-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -12630,23 +12630,23 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 // CHECK14-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK14-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !34 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -12740,27 +12740,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 // CHECK14-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK14-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !22 -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !37 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !37 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !37 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK14-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK14-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: @@ -12877,27 +12877,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK14-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK14-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40 // CHECK14-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK14-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !40 // CHECK14-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK14-NEXT: store i32 [[TMP20]], i32* [[CONV7]], align 4 -// CHECK14-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP21]]) +// CHECK14-NEXT: store i32 [[TMP20]], i32* [[CONV7]], align 4, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP21]]), !llvm.access.group !40 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -12995,27 +12995,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 +// CHECK14-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK14-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK14-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK14-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK14-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !26 -// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !26 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !43 +// CHECK14-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !43 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 +// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !43 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK14-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: @@ -13234,23 +13234,23 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 // CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !46 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -13314,27 +13314,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 // CHECK14-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK14-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !49 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !49 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK14-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK14-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -13398,23 +13398,23 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 // CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !52 // CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !52 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !52 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -13478,27 +13478,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !55 // CHECK14-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK14-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !55 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !55 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !55 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK14-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK14-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -13577,27 +13577,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 // CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !58 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK14-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK14-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58 +// CHECK14-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !58 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -13677,27 +13677,27 @@ int main (int argc, char **argv) { // CHECK14: omp.dispatch.body: // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 // CHECK14-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK14-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61 +// CHECK14-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !61 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !61 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK14-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK14-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: @@ -13771,23 +13771,23 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 // CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !64 // CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !64 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !64 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP65:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -13848,27 +13848,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 +// CHECK14-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !67 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !36 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !36 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !67 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !67 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !36 +// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !67 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 // CHECK14-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK14-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP68:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: @@ -13948,27 +13948,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK14-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK14-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK14-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK14-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !70 // CHECK14-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK14-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !70 // CHECK14-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK14-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK14-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK14-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70 +// CHECK14-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70 +// CHECK14-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !70 // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK14-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !70 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP71:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK14: omp.loop.exit: @@ -14033,27 +14033,27 @@ int main (int argc, char **argv) { // CHECK14-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK14-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK14: omp.inner.for.cond: -// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK14-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !73 // CHECK14-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK14-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK14: omp.inner.for.body: -// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 // CHECK14-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK14-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !40 -// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !40 +// CHECK14-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !73 +// CHECK14-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !73 // CHECK14-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK14-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !40 +// CHECK14-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !73 // CHECK14-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK14: omp.body.continue: // CHECK14-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK14: omp.inner.for.inc: -// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK14-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 // CHECK14-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK14-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] +// CHECK14-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK14-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP74:![0-9]+]] // CHECK14: omp.inner.for.end: // CHECK14-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK14: omp.dispatch.inc: @@ -14535,23 +14535,23 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 // CHECK15-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK15-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !13 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -14648,27 +14648,27 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !17 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !17 // CHECK15-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK15-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -14771,23 +14771,23 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 // CHECK15-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK15-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !22 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -14884,27 +14884,27 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !25 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !25 // CHECK15-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK15-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK15-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -15023,51 +15023,51 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !28 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK15-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 // CHECK15-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK15-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !28 // CHECK15-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK15-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4 -// CHECK15-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]) +// CHECK15-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4, !llvm.access.group !28 +// CHECK15-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]), !llvm.access.group !28 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK15-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK15-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK15-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK15-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK15-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK15-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK15-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK15-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// CHECK15-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK15-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK15-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !28 // CHECK15-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] // CHECK15-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] // CHECK15: cond.true12: -// CHECK15-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK15-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !28 // CHECK15-NEXT: br label [[COND_END14:%.*]] // CHECK15: cond.false13: -// CHECK15-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK15-NEXT: br label [[COND_END14]] // CHECK15: cond.end14: // CHECK15-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE12]] ], [ [[TMP32]], [[COND_FALSE13]] ] -// CHECK15-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK15-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK15-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK15-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK15-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK15-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -15167,27 +15167,27 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 // CHECK15-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK15-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !31 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !31 // CHECK15-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !31 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK15-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK15-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -15290,23 +15290,23 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 // CHECK15-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK15-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !34 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -15400,27 +15400,27 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 // CHECK15-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK15-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !22 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !37 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !37 // CHECK15-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !37 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK15-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK15-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK15: omp.dispatch.inc: @@ -15537,27 +15537,27 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK15-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK15-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40 // CHECK15-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK15-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !40 // CHECK15-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK15-NEXT: store i32 [[TMP20]], i32* [[CONV7]], align 4 -// CHECK15-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP21]]) +// CHECK15-NEXT: store i32 [[TMP20]], i32* [[CONV7]], align 4, !llvm.access.group !40 +// CHECK15-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP21]]), !llvm.access.group !40 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK15-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -15655,27 +15655,27 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 +// CHECK15-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK15-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK15-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK15-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK15-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !26 -// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !26 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !43 +// CHECK15-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !43 // CHECK15-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 +// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !43 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK15-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK15-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK15: omp.dispatch.inc: @@ -15894,23 +15894,23 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 // CHECK15-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK15-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !46 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -15974,27 +15974,27 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 // CHECK15-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !49 // CHECK15-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !49 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK15-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -16058,23 +16058,23 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 // CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !52 // CHECK15-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 // CHECK15-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !52 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !52 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -16138,27 +16138,27 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !55 // CHECK15-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK15-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !55 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !55 // CHECK15-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !55 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK15-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -16237,27 +16237,27 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 // CHECK15-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK15-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !58 // CHECK15-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK15-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK15-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58 +// CHECK15-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !58 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -16337,27 +16337,27 @@ int main (int argc, char **argv) { // CHECK15: omp.dispatch.body: // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 // CHECK15-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK15-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61 +// CHECK15-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !61 // CHECK15-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !61 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK15-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK15-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK15-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK15: omp.dispatch.inc: @@ -16431,23 +16431,23 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 // CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !64 // CHECK15-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 // CHECK15-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !64 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !64 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP65:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -16508,27 +16508,27 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 +// CHECK15-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !67 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !36 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !36 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !67 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !67 // CHECK15-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !36 +// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !67 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 // CHECK15-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK15-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP68:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK15: omp.dispatch.inc: @@ -16608,27 +16608,27 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK15-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK15-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK15-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK15-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !70 // CHECK15-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK15-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !70 // CHECK15-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK15-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK15-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK15-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70 +// CHECK15-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70 +// CHECK15-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !70 // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK15-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !70 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP71:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK15: omp.loop.exit: @@ -16693,27 +16693,27 @@ int main (int argc, char **argv) { // CHECK15-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK15-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK15: omp.inner.for.cond: -// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 +// CHECK15-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK15-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !73 // CHECK15-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK15-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK15: omp.inner.for.body: -// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK15-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 // CHECK15-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK15-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !40 -// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !40 +// CHECK15-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !73 +// CHECK15-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !73 // CHECK15-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK15-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !40 +// CHECK15-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !73 // CHECK15-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK15: omp.body.continue: // CHECK15-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK15: omp.inner.for.inc: -// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK15-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 // CHECK15-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK15-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] +// CHECK15-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK15-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP74:![0-9]+]] // CHECK15: omp.inner.for.end: // CHECK15-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK15: omp.dispatch.inc: @@ -17195,23 +17195,23 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK16-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK16-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !13 // CHECK16-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !13 // CHECK16-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !13 // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !13 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -17308,27 +17308,27 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !17 // CHECK16-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK16-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !17 +// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !17 // CHECK16-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !17 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 // CHECK16-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK16-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !17 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -17431,23 +17431,23 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK16-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK16-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !22 // CHECK16-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !22 // CHECK16-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !22 // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !22 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -17544,27 +17544,27 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK16-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK16-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !25 +// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !25 // CHECK16-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK16-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK16-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -17683,51 +17683,51 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !28 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK16-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] // CHECK16-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 // CHECK16-NEXT: [[TMP18:%.*]] = zext i32 [[TMP17]] to i64 -// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK16-NEXT: [[TMP20:%.*]] = zext i32 [[TMP19]] to i64 -// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !28 // CHECK16-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK16-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4 -// CHECK16-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]) +// CHECK16-NEXT: store i32 [[TMP21]], i32* [[CONV7]], align 4, !llvm.access.group !28 +// CHECK16-NEXT: [[TMP22:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !28 +// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i64 [[TMP18]], i64 [[TMP20]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP22]]), !llvm.access.group !28 // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK16-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK16-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK16-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK16-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK16-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK16-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK16-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK16-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK16-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !28 // CHECK16-NEXT: [[ADD10:%.*]] = add nsw i32 [[TMP27]], [[TMP28]] -// CHECK16-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK16-NEXT: store i32 [[ADD10]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK16-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK16-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !28 // CHECK16-NEXT: [[CMP11:%.*]] = icmp sgt i32 [[TMP29]], [[TMP30]] // CHECK16-NEXT: br i1 [[CMP11]], label [[COND_TRUE12:%.*]], label [[COND_FALSE13:%.*]] // CHECK16: cond.true12: -// CHECK16-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK16-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !28 // CHECK16-NEXT: br label [[COND_END14:%.*]] // CHECK16: cond.false13: -// CHECK16-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP32:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 // CHECK16-NEXT: br label [[COND_END14]] // CHECK16: cond.end14: // CHECK16-NEXT: [[COND15:%.*]] = phi i32 [ [[TMP31]], [[COND_TRUE12]] ], [ [[TMP32]], [[COND_FALSE13]] ] -// CHECK16-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK16-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK16-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK16-NEXT: store i32 [[COND15]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !28 +// CHECK16-NEXT: [[TMP33:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !28 +// CHECK16-NEXT: store i32 [[TMP33]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -17827,27 +17827,27 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !31 // CHECK16-NEXT: [[CMP8:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK16-NEXT: br i1 [[CMP8]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I6]], align 4 -// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !31 +// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !31 // CHECK16-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP19]] to i64 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !31 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 // CHECK16-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK16-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !31 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -17950,23 +17950,23 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK16-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK16-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !34 // CHECK16-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !34 // CHECK16-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]) +// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !34 // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !34 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !34 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -18060,27 +18060,27 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 // CHECK16-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK16-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !22 -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !22 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !37 +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !37 // CHECK16-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 +// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !37 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 // CHECK16-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK16-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK16: omp.dispatch.inc: @@ -18197,27 +18197,27 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK16-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK16-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !40 // CHECK16-NEXT: [[TMP17:%.*]] = zext i32 [[TMP16]] to i64 -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !40 // CHECK16-NEXT: [[TMP19:%.*]] = zext i32 [[TMP18]] to i64 -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !40 // CHECK16-NEXT: [[CONV7:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK16-NEXT: store i32 [[TMP20]], i32* [[CONV7]], align 4 -// CHECK16-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP21]]) +// CHECK16-NEXT: store i32 [[TMP20]], i32* [[CONV7]], align 4, !llvm.access.group !40 +// CHECK16-NEXT: [[TMP21:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !40 +// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, i32*, i64, i32*, i64)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i64 [[TMP17]], i64 [[TMP19]], i32* [[TMP0]], i64 [[TMP1]], i32* [[TMP2]], i64 [[TMP21]]), !llvm.access.group !40 // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK16-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !40 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP22]], [[TMP23]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -18315,27 +18315,27 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 +// CHECK16-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK16-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !43 // CHECK16-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK16-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK16-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !26 -// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !26 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I6]], align 4, !llvm.access.group !43 +// CHECK16-NEXT: [[TMP21:%.*]] = load i32, i32* [[I6]], align 4, !llvm.access.group !43 // CHECK16-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP21]] to i64 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 +// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !43 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK16-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 // CHECK16-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK16-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !43 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP44:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK16: omp.dispatch.inc: @@ -18554,23 +18554,23 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK16-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK16-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !46 // CHECK16-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !46 // CHECK16-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !46 // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !46 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !46 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP47:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -18634,27 +18634,27 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !49 // CHECK16-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK16-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !49 +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !49 // CHECK16-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !49 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 // CHECK16-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK16-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !49 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP50:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -18718,23 +18718,23 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 // CHECK16-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK16-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !52 // CHECK16-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !52 // CHECK16-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !52 // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !52 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !52 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP53:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -18798,27 +18798,27 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !55 // CHECK16-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK16-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !55 +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !55 // CHECK16-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !55 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 // CHECK16-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK16-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !55 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP56:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -18897,27 +18897,27 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK16-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK16-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !58 // CHECK16-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !58 // CHECK16-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !58 // CHECK16-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK16-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK16-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK16-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !58 +// CHECK16-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !58 +// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !58 // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !58 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !58 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP59:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -18997,27 +18997,27 @@ int main (int argc, char **argv) { // CHECK16: omp.dispatch.body: // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !61 // CHECK16-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK16-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !61 +// CHECK16-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !61 // CHECK16-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP16]] to i64 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !61 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK16-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 // CHECK16-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK16-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !61 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP62:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK16: omp.dispatch.inc: @@ -19091,23 +19091,23 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 // CHECK16-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK16-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !64 // CHECK16-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !64 // CHECK16-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]) +// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]]), !llvm.access.group !64 // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !64 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !64 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP65:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -19168,27 +19168,27 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !36 +// CHECK16-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !67 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !36 -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !36 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !67 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !67 // CHECK16-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP12]] to i64 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !36 +// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !67 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 // CHECK16-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK16-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !36 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP37:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !67 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP68:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK16: omp.dispatch.inc: @@ -19268,27 +19268,27 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK16-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK16-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK16-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 +// CHECK16-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !70 // CHECK16-NEXT: [[TMP9:%.*]] = zext i32 [[TMP8]] to i64 -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !70 // CHECK16-NEXT: [[TMP11:%.*]] = zext i32 [[TMP10]] to i64 -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[CONV]], align 8, !llvm.access.group !70 // CHECK16-NEXT: [[CONV2:%.*]] = bitcast i64* [[DOTCAPTURE_EXPR__CASTED]] to i32* -// CHECK16-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4 -// CHECK16-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8 -// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]) +// CHECK16-NEXT: store i32 [[TMP12]], i32* [[CONV2]], align 4, !llvm.access.group !70 +// CHECK16-NEXT: [[TMP13:%.*]] = load i64, i64* [[DOTCAPTURE_EXPR__CASTED]], align 8, !llvm.access.group !70 +// CHECK16-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i64, i64, [10 x i32]*, i64)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i64 [[TMP9]], i64 [[TMP11]], [10 x i32]* [[TMP0]], i64 [[TMP13]]), !llvm.access.group !70 // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK16-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !70 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP14]], [[TMP15]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !70 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP71:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK16: omp.loop.exit: @@ -19353,27 +19353,27 @@ int main (int argc, char **argv) { // CHECK16-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK16-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK16: omp.inner.for.cond: -// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !40 +// CHECK16-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK16-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !73 // CHECK16-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK16-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK16: omp.inner.for.body: -// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK16-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 // CHECK16-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK16-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !40 -// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !40 +// CHECK16-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !73 +// CHECK16-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !73 // CHECK16-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP13]] to i64 // CHECK16-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !40 +// CHECK16-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !73 // CHECK16-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK16: omp.body.continue: // CHECK16-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK16: omp.inner.for.inc: -// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 +// CHECK16-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 // CHECK16-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK16-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !40 -// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP41:![0-9]+]] +// CHECK16-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !73 +// CHECK16-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP74:![0-9]+]] // CHECK16: omp.inner.for.end: // CHECK16-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK16: omp.dispatch.inc: @@ -19851,21 +19851,21 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !14 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -19960,26 +19960,26 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !18 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] -// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK17-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -20081,21 +20081,21 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !23 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -20190,26 +20190,26 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 // CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !26 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !26 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] -// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK17-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -20324,48 +20324,48 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK17-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]) +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]), !llvm.access.group !29 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK17-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK17-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 // CHECK17-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] // CHECK17-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK17: cond.true11: -// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK17-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 // CHECK17-NEXT: br label [[COND_END13:%.*]] // CHECK17: cond.false12: -// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK17-NEXT: br label [[COND_END13]] // CHECK17: cond.end13: // CHECK17-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE11]] ], [ [[TMP30]], [[COND_FALSE12]] ] -// CHECK17-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK17-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -20462,26 +20462,26 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 // CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !32 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !32 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] -// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK17-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -20583,21 +20583,21 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !35 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -20689,26 +20689,26 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 // CHECK17-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK17-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !23 -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !38 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !38 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP20]] -// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !38 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK17-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK17-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: @@ -20821,24 +20821,24 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 // CHECK17-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK17-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP19]]) +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !41 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !41 +// CHECK17-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP19]]), !llvm.access.group !41 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !41 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -20933,26 +20933,26 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK17-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK17-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !44 // CHECK17-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK17-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK17-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !27 -// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !27 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !44 +// CHECK17-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !44 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP21]] -// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 +// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK17-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK17-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK17-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: @@ -21169,21 +21169,21 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !47 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -21245,26 +21245,26 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 // CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !50 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !50 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !50 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK17-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -21328,21 +21328,21 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 // CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !53 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !53 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !53 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -21404,26 +21404,26 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !56 // CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !56 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !56 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !56 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK17-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -21499,24 +21499,24 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !59 +// CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !59 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -21590,26 +21590,26 @@ int main (int argc, char **argv) { // CHECK17: omp.dispatch.body: // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 // CHECK17-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK17-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !62 +// CHECK17-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !62 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP16]] -// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !62 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK17-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK17-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: @@ -21683,21 +21683,21 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 // CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !65 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !65 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !65 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP66:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -21756,26 +21756,26 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !68 // CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !37 -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !37 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !68 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !68 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP12]] -// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !37 +// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !68 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 // CHECK17-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK17-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP69:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: @@ -21852,24 +21852,24 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 // CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !71 +// CHECK17-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !71 +// CHECK17-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71 +// CHECK17-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !71 // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !71 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP72:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -21931,26 +21931,26 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !74 // CHECK17-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK17-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK17-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !41 -// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !41 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !74 +// CHECK17-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !74 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 +// CHECK17-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !74 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK17-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 // CHECK17-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK17-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP75:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK17: omp.dispatch.inc: @@ -22428,21 +22428,21 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK18-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK18-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 +// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !14 // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -22537,26 +22537,26 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK18-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK18-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !18 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !18 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] -// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK18-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK18-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -22658,21 +22658,21 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK18-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK18-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 +// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !23 // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -22767,26 +22767,26 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 // CHECK18-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK18-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !26 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !26 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] -// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK18-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK18-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -22901,48 +22901,48 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK18-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] // CHECK18-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK18-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]) +// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !29 +// CHECK18-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29 +// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29 +// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]), !llvm.access.group !29 // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK18-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK18-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK18-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK18-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK18-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK18-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK18-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK18-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK18-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK18-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK18-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK18-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK18-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK18-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK18-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK18-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK18-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK18-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK18-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK18-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 // CHECK18-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] // CHECK18-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK18: cond.true11: -// CHECK18-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK18-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 // CHECK18-NEXT: br label [[COND_END13:%.*]] // CHECK18: cond.false12: -// CHECK18-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK18-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK18-NEXT: br label [[COND_END13]] // CHECK18: cond.end13: // CHECK18-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE11]] ], [ [[TMP30]], [[COND_FALSE12]] ] -// CHECK18-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK18-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK18-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK18-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK18-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK18-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -23039,26 +23039,26 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 // CHECK18-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK18-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !32 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !32 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] -// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK18-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK18-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -23160,21 +23160,21 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK18-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK18-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 +// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !35 // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -23266,26 +23266,26 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 // CHECK18-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK18-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !23 -// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !23 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !38 +// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !38 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP20]] -// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !23 +// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !38 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK18-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK18-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: @@ -23398,24 +23398,24 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 // CHECK18-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK18-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK18-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP19]]) +// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !41 +// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 +// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !41 +// CHECK18-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41 +// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP19]]), !llvm.access.group !41 // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !41 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -23510,26 +23510,26 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK18-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK18-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !44 // CHECK18-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK18-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK18-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !27 -// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !27 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !44 +// CHECK18-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !44 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP21]] -// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 +// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK18-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK18-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK18-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: @@ -23746,21 +23746,21 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK18-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK18-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 +// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !47 // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -23822,26 +23822,26 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 // CHECK18-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK18-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !50 +// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !50 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !50 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK18-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK18-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -23905,21 +23905,21 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 // CHECK18-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK18-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !53 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 +// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !53 // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !53 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -23981,26 +23981,26 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !56 // CHECK18-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK18-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !56 +// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !56 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !56 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK18-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK18-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -24076,24 +24076,24 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK18-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK18-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK18-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !59 +// CHECK18-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59 +// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59 +// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !59 // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -24167,26 +24167,26 @@ int main (int argc, char **argv) { // CHECK18: omp.dispatch.body: // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 // CHECK18-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK18-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !62 +// CHECK18-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !62 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP16]] -// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !62 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK18-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK18-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: @@ -24260,21 +24260,21 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 // CHECK18-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK18-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !65 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 +// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !65 // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !65 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP66:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -24333,26 +24333,26 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !68 // CHECK18-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK18-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !37 -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !37 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !68 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !68 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP12]] -// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !37 +// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !68 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 // CHECK18-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK18-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP69:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: @@ -24429,24 +24429,24 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 // CHECK18-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK18-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK18-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !71 +// CHECK18-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !71 +// CHECK18-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71 +// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71 +// CHECK18-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !71 // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !71 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP72:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -24508,26 +24508,26 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !74 // CHECK18-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK18-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK18-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !41 -// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !41 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !74 +// CHECK18-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !74 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 +// CHECK18-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !74 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK18-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 // CHECK18-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK18-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP75:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK18: omp.dispatch.inc: @@ -25005,21 +25005,21 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !14 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -25114,26 +25114,26 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !18 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] -// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -25235,21 +25235,21 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !23 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -25344,26 +25344,26 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 // CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !26 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !26 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] -// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -25478,48 +25478,48 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK19-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]) +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]), !llvm.access.group !29 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK19-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK19-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK19-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK19-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK19-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 // CHECK19-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] // CHECK19-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK19: cond.true11: -// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK19-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 // CHECK19-NEXT: br label [[COND_END13:%.*]] // CHECK19: cond.false12: -// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK19-NEXT: br label [[COND_END13]] // CHECK19: cond.end13: // CHECK19-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE11]] ], [ [[TMP30]], [[COND_FALSE12]] ] -// CHECK19-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK19-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -25616,26 +25616,26 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 // CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !32 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !32 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] -// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK19-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK19-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -25737,21 +25737,21 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !35 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -25843,26 +25843,26 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 // CHECK19-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK19-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !23 -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !38 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !38 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP20]] -// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !38 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK19-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK19-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: @@ -25975,24 +25975,24 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 // CHECK19-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK19-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP19]]) +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP19]]), !llvm.access.group !41 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !41 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -26087,26 +26087,26 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK19-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK19-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !44 // CHECK19-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK19-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK19-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !27 -// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !27 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !44 +// CHECK19-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !44 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP21]] -// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 +// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK19-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK19-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: @@ -26323,21 +26323,21 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !47 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -26399,26 +26399,26 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 // CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !50 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !50 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !50 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK19-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -26482,21 +26482,21 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 // CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !53 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !53 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -26558,26 +26558,26 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !56 // CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !56 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !56 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !56 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK19-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -26653,24 +26653,24 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !59 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -26744,26 +26744,26 @@ int main (int argc, char **argv) { // CHECK19: omp.dispatch.body: // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 // CHECK19-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK19-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !62 +// CHECK19-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !62 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP16]] -// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !62 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK19-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK19-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: @@ -26837,21 +26837,21 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 // CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !65 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !65 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP66:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -26910,26 +26910,26 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !68 // CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !37 -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !37 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !68 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !68 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP12]] -// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !37 +// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !68 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 // CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK19-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP69:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: @@ -27006,24 +27006,24 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 // CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !71 // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !71 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP72:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -27085,26 +27085,26 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !74 // CHECK19-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK19-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !41 -// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !74 +// CHECK19-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !74 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !74 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK19-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 // CHECK19-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK19-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP75:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK19: omp.dispatch.inc: @@ -27582,21 +27582,21 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 // CHECK20-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK20-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !14 +// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !14 +// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..1 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !14 // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !14 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -27691,26 +27691,26 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK20-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK20-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !18 +// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !18 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] -// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK20-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK20-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -27812,21 +27812,21 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 // CHECK20-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK20-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !23 +// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !23 +// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..3 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !23 // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !23 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -27921,26 +27921,26 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !26 // CHECK20-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK20-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !26 +// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !26 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] -// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !26 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 // CHECK20-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK20-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !26 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP27:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -28055,48 +28055,48 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP14]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP16]], 1 // CHECK20-NEXT: [[CMP6:%.*]] = icmp slt i32 [[TMP15]], [[ADD]] // CHECK20-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK20-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]) +// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: store i32 [[TMP19]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..6 to void (i32*, i32*, ...)*), i32 [[TMP17]], i32 [[TMP18]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP20]]), !llvm.access.group !29 // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK20-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK20-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP21]], [[TMP22]] -// CHECK20-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK20-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK20-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: [[TMP23:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK20-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP23]], [[TMP24]] -// CHECK20-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK20-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK20-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK20-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: [[TMP25:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: [[TMP26:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !29 // CHECK20-NEXT: [[ADD9:%.*]] = add nsw i32 [[TMP25]], [[TMP26]] -// CHECK20-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK20-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK20-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK20-NEXT: store i32 [[ADD9]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: [[TMP27:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: [[TMP28:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 // CHECK20-NEXT: [[CMP10:%.*]] = icmp sgt i32 [[TMP27]], [[TMP28]] // CHECK20-NEXT: br i1 [[CMP10]], label [[COND_TRUE11:%.*]], label [[COND_FALSE12:%.*]] // CHECK20: cond.true11: -// CHECK20-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4 +// CHECK20-NEXT: [[TMP29:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR_2]], align 4, !llvm.access.group !29 // CHECK20-NEXT: br label [[COND_END13:%.*]] // CHECK20: cond.false12: -// CHECK20-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK20-NEXT: [[TMP30:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 // CHECK20-NEXT: br label [[COND_END13]] // CHECK20: cond.end13: // CHECK20-NEXT: [[COND14:%.*]] = phi i32 [ [[TMP29]], [[COND_TRUE11]] ], [ [[TMP30]], [[COND_FALSE12]] ] -// CHECK20-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK20-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK20-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK20-NEXT: store i32 [[COND14]], i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: [[TMP31:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: store i32 [[TMP31]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !29 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -28193,26 +28193,26 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP15]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !32 // CHECK20-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP16]], [[TMP17]] // CHECK20-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP18]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !32 +// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !32 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP19]] -// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !32 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 // CHECK20-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP20]], 1 -// CHECK20-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !32 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -28314,21 +28314,21 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 // CHECK20-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK20-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]) +// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !35 +// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !35 +// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 5, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*)* @.omp_outlined..9 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]]), !llvm.access.group !35 // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !35 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP18]], [[TMP19]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !35 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -28420,26 +28420,26 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP16]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !23 +// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !38 // CHECK20-NEXT: [[CMP4:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK20-NEXT: br i1 [[CMP4]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !23 -// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !23 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !38 +// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !38 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP20]] -// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !23 +// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !38 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 +// CHECK20-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 // CHECK20-NEXT: [[ADD5:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK20-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !23 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD5]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !38 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP39:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK20: omp.dispatch.inc: @@ -28552,24 +28552,24 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 // CHECK20-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK20-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK20-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP19]]) +// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !41 +// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !41 +// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !41 +// CHECK20-NEXT: store i32 [[TMP18]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41 +// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !41 +// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 6, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, i32*, i32, i32*, i32)* @.omp_outlined..12 to void (i32*, i32*, ...)*), i32 [[TMP16]], i32 [[TMP17]], i32* [[TMP0]], i32 [[TMP1]], i32* [[TMP2]], i32 [[TMP19]]), !llvm.access.group !41 // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK20-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !41 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP20]], [[TMP21]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -28664,26 +28664,26 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP17]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 +// CHECK20-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK20-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !44 // CHECK20-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP18]], [[TMP19]] // CHECK20-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK20-NEXT: [[TMP20:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP20]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !27 -// CHECK20-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !27 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !44 +// CHECK20-NEXT: [[TMP21:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !44 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP21]] -// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 +// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !44 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK20-NEXT: [[TMP22:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 // CHECK20-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP22]], 1 -// CHECK20-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !44 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP45:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK20: omp.dispatch.inc: @@ -28900,21 +28900,21 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 // CHECK20-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK20-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !47 +// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !47 +// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..15 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !47 // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !47 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP30:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !47 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP48:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -28976,26 +28976,26 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !50 // CHECK20-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK20-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !50 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !50 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !50 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 // CHECK20-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK20-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP31:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !50 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP51:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -29059,21 +29059,21 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 // CHECK20-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK20-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !53 +// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !53 +// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..18 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !53 // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !53 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP32:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !53 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP54:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -29135,26 +29135,26 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP7]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !56 // CHECK20-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK20-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !56 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !56 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !56 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 // CHECK20-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK20-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP33:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !56 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP57:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -29230,24 +29230,24 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 // CHECK20-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK20-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK20-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !59 +// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !59 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !59 +// CHECK20-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !59 +// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..22 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !59 // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !59 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP34:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !59 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP60:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -29321,26 +29321,26 @@ int main (int argc, char **argv) { // CHECK20: omp.dispatch.body: // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !62 // CHECK20-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP13]], [[TMP14]] // CHECK20-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP15]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !62 +// CHECK20-NEXT: [[TMP16:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !62 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP16]] -// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !62 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 // CHECK20-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP17]], 1 -// CHECK20-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP35:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !62 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP63:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK20: omp.dispatch.inc: @@ -29414,21 +29414,21 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 // CHECK20-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK20-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]) +// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !65 +// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !65 +// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*)* @.omp_outlined..26 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]]), !llvm.access.group !65 // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !65 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP10]], [[TMP11]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP36:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !65 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP66:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -29487,26 +29487,26 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP8]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !37 +// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !68 // CHECK20-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP9]], [[TMP10]] // CHECK20-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP11]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !37 -// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !37 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !68 +// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !68 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP12]] -// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !37 +// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !68 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 +// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 // CHECK20-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP13]], 1 -// CHECK20-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !37 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP38:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !68 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP69:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK20: omp.dispatch.inc: @@ -29583,24 +29583,24 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 +// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 // CHECK20-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK20-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4 -// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4 -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4 -// CHECK20-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4 -// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]) +// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_COMB_LB]], align 4, !llvm.access.group !71 +// CHECK20-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_COMB_UB]], align 4, !llvm.access.group !71 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__ADDR]], align 4, !llvm.access.group !71 +// CHECK20-NEXT: store i32 [[TMP10]], i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTCAPTURE_EXPR__CASTED]], align 4, !llvm.access.group !71 +// CHECK20-NEXT: call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB3]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32, i32, [10 x i32]*, i32)* @.omp_outlined..30 to void (i32*, i32*, ...)*), i32 [[TMP8]], i32 [[TMP9]], [10 x i32]* [[TMP0]], i32 [[TMP11]]), !llvm.access.group !71 // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4 +// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[DOTOMP_STRIDE]], align 4, !llvm.access.group !71 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP12]], [[TMP13]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP40:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !71 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP72:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -29662,26 +29662,26 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP9]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !41 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !74 // CHECK20-NEXT: [[CMP:%.*]] = icmp sle i32 [[TMP10]], [[TMP11]] // CHECK20-NEXT: br i1 [[CMP]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK20-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP12]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !41 -// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !41 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !74 +// CHECK20-NEXT: [[TMP13:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !74 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP13]] -// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !41 +// CHECK20-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !74 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 +// CHECK20-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 // CHECK20-NEXT: [[ADD1:%.*]] = add nsw i32 [[TMP14]], 1 -// CHECK20-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !41 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP42:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD1]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !74 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP75:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK20: omp.dispatch.inc: diff --git a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp index b26e492220061..7e95a42396ea5 100644 --- a/clang/test/OpenMP/teams_distribute_simd_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_codegen.cpp @@ -452,27 +452,27 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK1-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP2]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK1-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -589,7 +589,7 @@ int main (int argc, char **argv) { // CHECK1-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK1-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -861,27 +861,27 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK2-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP2]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK2-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -998,7 +998,7 @@ int main (int argc, char **argv) { // CHECK2-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK2-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -1261,26 +1261,26 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK3-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP2]], i32 0, i32 [[TMP17]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK3-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1395,7 +1395,7 @@ int main (int argc, char **argv) { // CHECK3-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK3-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1658,26 +1658,26 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK4-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [100 x i32], [100 x i32]* [[TMP2]], i32 0, i32 [[TMP17]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK4-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -1792,7 +1792,7 @@ int main (int argc, char **argv) { // CHECK4-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP17]], 1 // CHECK4-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -2522,27 +2522,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !4 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -2736,27 +2736,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !4 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK10-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK10-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -2948,26 +2948,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !5 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !5 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP17]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -3159,26 +3159,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !5 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !5 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP17]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -3637,31 +3637,31 @@ int main (int argc, char **argv) { // CHECK17-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK17-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK17: omp.inner.for.cond: -// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK17-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK17-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 // CHECK17-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK17-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK17: omp.inner.for.body: -// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK17-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK17-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK17-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 // CHECK17-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 1 -// CHECK17-NEXT: [[TMP9:%.*]] = load float, float* [[B]], align 4 +// CHECK17-NEXT: [[TMP9:%.*]] = load float, float* [[B]], align 4, !llvm.access.group !4 // CHECK17-NEXT: [[CONV:%.*]] = fptosi float [[TMP9]] to i32 // CHECK17-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4 +// CHECK17-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !4 // CHECK17-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK17-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK17-NEXT: store i32 [[CONV]], i32* [[ARRAYIDX]], align 4 +// CHECK17-NEXT: store i32 [[CONV]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !4 // CHECK17-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK17: omp.body.continue: // CHECK17-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK17: omp.inner.for.inc: -// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK17-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK17-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK17-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK17-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK17-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK17: omp.inner.for.end: // CHECK17-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK17: omp.loop.exit: @@ -3807,31 +3807,31 @@ int main (int argc, char **argv) { // CHECK18-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK18-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK18: omp.inner.for.cond: -// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK18-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK18-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 // CHECK18-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK18-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK18: omp.inner.for.body: -// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK18-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK18-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK18-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 // CHECK18-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 1 -// CHECK18-NEXT: [[TMP9:%.*]] = load float, float* [[B]], align 4 +// CHECK18-NEXT: [[TMP9:%.*]] = load float, float* [[B]], align 4, !llvm.access.group !4 // CHECK18-NEXT: [[CONV:%.*]] = fptosi float [[TMP9]] to i32 // CHECK18-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4 +// CHECK18-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !4 // CHECK18-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP10]] to i64 // CHECK18-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK18-NEXT: store i32 [[CONV]], i32* [[ARRAYIDX]], align 4 +// CHECK18-NEXT: store i32 [[CONV]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !4 // CHECK18-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK18: omp.body.continue: // CHECK18-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK18: omp.inner.for.inc: -// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK18-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK18-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK18-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP3:![0-9]+]] +// CHECK18-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK18-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] // CHECK18: omp.inner.for.end: // CHECK18-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK18: omp.loop.exit: @@ -3977,30 +3977,30 @@ int main (int argc, char **argv) { // CHECK19-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK19-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK19: omp.inner.for.cond: -// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK19-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK19-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 // CHECK19-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK19-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK19: omp.inner.for.body: -// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK19-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK19-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK19-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 // CHECK19-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 1 -// CHECK19-NEXT: [[TMP9:%.*]] = load float, float* [[B]], align 4 +// CHECK19-NEXT: [[TMP9:%.*]] = load float, float* [[B]], align 4, !llvm.access.group !5 // CHECK19-NEXT: [[CONV:%.*]] = fptosi float [[TMP9]] to i32 // CHECK19-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4 +// CHECK19-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 // CHECK19-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP10]] -// CHECK19-NEXT: store i32 [[CONV]], i32* [[ARRAYIDX]], align 4 +// CHECK19-NEXT: store i32 [[CONV]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 // CHECK19-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK19: omp.body.continue: // CHECK19-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK19: omp.inner.for.inc: -// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK19-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK19-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK19-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK19-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK19-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK19: omp.inner.for.end: // CHECK19-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK19: omp.loop.exit: @@ -4146,30 +4146,30 @@ int main (int argc, char **argv) { // CHECK20-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK20-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK20: omp.inner.for.cond: -// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK20-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK20-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 // CHECK20-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK20-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK20: omp.inner.for.body: -// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK20-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK20-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK20-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 // CHECK20-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 1 -// CHECK20-NEXT: [[TMP9:%.*]] = load float, float* [[B]], align 4 +// CHECK20-NEXT: [[TMP9:%.*]] = load float, float* [[B]], align 4, !llvm.access.group !5 // CHECK20-NEXT: [[CONV:%.*]] = fptosi float [[TMP9]] to i32 // CHECK20-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4 +// CHECK20-NEXT: [[TMP10:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 // CHECK20-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP10]] -// CHECK20-NEXT: store i32 [[CONV]], i32* [[ARRAYIDX]], align 4 +// CHECK20-NEXT: store i32 [[CONV]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 // CHECK20-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK20: omp.body.continue: // CHECK20-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK20: omp.inner.for.inc: -// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK20-NEXT: [[TMP11:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK20-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP11]], 1 -// CHECK20-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK20-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK20-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK20: omp.inner.for.end: // CHECK20-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK20: omp.loop.exit: @@ -4335,31 +4335,31 @@ int main (int argc, char **argv) { // CHECK21: omp_if.then: // CHECK21-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK21: omp.inner.for.cond: -// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK21-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK21-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 // CHECK21-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK21-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK21: omp.inner.for.body: -// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK21-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK21-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK21-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 // CHECK21-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 1 -// CHECK21-NEXT: [[TMP10:%.*]] = load float, float* [[B]], align 4, !nontemporal !3 +// CHECK21-NEXT: [[TMP10:%.*]] = load float, float* [[B]], align 4, !nontemporal !5, !llvm.access.group !4 // CHECK21-NEXT: [[CONV2:%.*]] = fptosi float [[TMP10]] to i32 // CHECK21-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK21-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !4 // CHECK21-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK21-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK21-NEXT: store i32 [[CONV2]], i32* [[ARRAYIDX]], align 4 +// CHECK21-NEXT: store i32 [[CONV2]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !4 // CHECK21-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK21: omp.body.continue: // CHECK21-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK21: omp.inner.for.inc: -// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK21-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK21-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK21-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK21-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK21: omp.inner.for.end: // CHECK21-NEXT: br label [[OMP_IF_END:%.*]] // CHECK21: omp_if.else: @@ -4389,7 +4389,7 @@ int main (int argc, char **argv) { // CHECK21-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK21-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK21-NEXT: store i32 [[ADD16]], i32* [[DOTOMP_IV]], align 4 -// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND4]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK21-NEXT: br label [[OMP_INNER_FOR_COND4]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK21: omp.inner.for.end17: // CHECK21-NEXT: br label [[OMP_IF_END]] // CHECK21: omp_if.end: @@ -4557,31 +4557,31 @@ int main (int argc, char **argv) { // CHECK22: omp_if.then: // CHECK22-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK22: omp.inner.for.cond: -// CHECK22-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK22-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK22-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !4 // CHECK22-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK22-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK22: omp.inner.for.body: -// CHECK22-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK22-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK22-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK22-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK22-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !4 // CHECK22-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 1 -// CHECK22-NEXT: [[TMP10:%.*]] = load float, float* [[B]], align 4, !nontemporal !3 +// CHECK22-NEXT: [[TMP10:%.*]] = load float, float* [[B]], align 4, !nontemporal !5, !llvm.access.group !4 // CHECK22-NEXT: [[CONV2:%.*]] = fptosi float [[TMP10]] to i32 // CHECK22-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK22-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK22-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !4 // CHECK22-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK22-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK22-NEXT: store i32 [[CONV2]], i32* [[ARRAYIDX]], align 4 +// CHECK22-NEXT: store i32 [[CONV2]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !4 // CHECK22-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK22: omp.body.continue: // CHECK22-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK22: omp.inner.for.inc: -// CHECK22-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK22-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 // CHECK22-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK22-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK22-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !4 +// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK22: omp.inner.for.end: // CHECK22-NEXT: br label [[OMP_IF_END:%.*]] // CHECK22: omp_if.else: @@ -4611,7 +4611,7 @@ int main (int argc, char **argv) { // CHECK22-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK22-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK22-NEXT: store i32 [[ADD16]], i32* [[DOTOMP_IV]], align 4 -// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND4]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK22-NEXT: br label [[OMP_INNER_FOR_COND4]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK22: omp.inner.for.end17: // CHECK22-NEXT: br label [[OMP_IF_END]] // CHECK22: omp_if.end: @@ -4779,30 +4779,30 @@ int main (int argc, char **argv) { // CHECK23: omp_if.then: // CHECK23-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK23: omp.inner.for.cond: -// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK23-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK23-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 // CHECK23-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK23-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK23: omp.inner.for.body: -// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK23-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK23-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK23-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 // CHECK23-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 1 -// CHECK23-NEXT: [[TMP10:%.*]] = load float, float* [[B]], align 4, !nontemporal !4 +// CHECK23-NEXT: [[TMP10:%.*]] = load float, float* [[B]], align 4, !nontemporal !6, !llvm.access.group !5 // CHECK23-NEXT: [[CONV2:%.*]] = fptosi float [[TMP10]] to i32 // CHECK23-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK23-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 // CHECK23-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK23-NEXT: store i32 [[CONV2]], i32* [[ARRAYIDX]], align 4 +// CHECK23-NEXT: store i32 [[CONV2]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 // CHECK23-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK23: omp.body.continue: // CHECK23-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK23: omp.inner.for.inc: -// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK23-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK23-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK23-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK23-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK23: omp.inner.for.end: // CHECK23-NEXT: br label [[OMP_IF_END:%.*]] // CHECK23: omp_if.else: @@ -4831,7 +4831,7 @@ int main (int argc, char **argv) { // CHECK23-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK23-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK23-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4 -// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND4]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK23-NEXT: br label [[OMP_INNER_FOR_COND4]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK23: omp.inner.for.end16: // CHECK23-NEXT: br label [[OMP_IF_END]] // CHECK23: omp_if.end: @@ -4999,30 +4999,30 @@ int main (int argc, char **argv) { // CHECK24: omp_if.then: // CHECK24-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK24: omp.inner.for.cond: -// CHECK24-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK24-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK24-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 // CHECK24-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP7]], [[TMP8]] // CHECK24-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK24: omp.inner.for.body: -// CHECK24-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK24-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK24-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP9]], 1 // CHECK24-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK24-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK24-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !5 // CHECK24-NEXT: [[B:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 1 -// CHECK24-NEXT: [[TMP10:%.*]] = load float, float* [[B]], align 4, !nontemporal !4 +// CHECK24-NEXT: [[TMP10:%.*]] = load float, float* [[B]], align 4, !nontemporal !6, !llvm.access.group !5 // CHECK24-NEXT: [[CONV2:%.*]] = fptosi float [[TMP10]] to i32 // CHECK24-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK24-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4 +// CHECK24-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !5 // CHECK24-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK24-NEXT: store i32 [[CONV2]], i32* [[ARRAYIDX]], align 4 +// CHECK24-NEXT: store i32 [[CONV2]], i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 // CHECK24-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK24: omp.body.continue: // CHECK24-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK24: omp.inner.for.inc: -// CHECK24-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK24-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK24-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK24-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK24-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK24: omp.inner.for.end: // CHECK24-NEXT: br label [[OMP_IF_END:%.*]] // CHECK24: omp_if.else: @@ -5051,7 +5051,7 @@ int main (int argc, char **argv) { // CHECK24-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK24-NEXT: [[ADD15:%.*]] = add nsw i32 [[TMP18]], 1 // CHECK24-NEXT: store i32 [[ADD15]], i32* [[DOTOMP_IV]], align 4 -// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND4]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK24-NEXT: br label [[OMP_INNER_FOR_COND4]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK24: omp.inner.for.end16: // CHECK24-NEXT: br label [[OMP_IF_END]] // CHECK24: omp_if.end: @@ -5872,27 +5872,27 @@ int main (int argc, char **argv) { // CHECK33-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK33-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK33: omp.inner.for.cond: -// CHECK33-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK33-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK33-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK33-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 // CHECK33-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK33-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK33: omp.inner.for.body: -// CHECK33-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK33-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK33-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK33-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK33-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK33-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK33-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !5 +// CHECK33-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !5 // CHECK33-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK33-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK33-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK33-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 // CHECK33-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK33: omp.body.continue: // CHECK33-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK33: omp.inner.for.inc: -// CHECK33-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK33-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK33-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK33-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK33-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK33-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK33-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK33: omp.inner.for.end: // CHECK33-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK33: omp.loop.exit: @@ -6037,27 +6037,27 @@ int main (int argc, char **argv) { // CHECK33-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK33-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK33: omp.inner.for.cond: -// CHECK33-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK33-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK33-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK33-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK33-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK33-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK33: omp.inner.for.body: -// CHECK33-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK33-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK33-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK33-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK33-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK33-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK33-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 +// CHECK33-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK33-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK33-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK33-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK33-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK33-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK33: omp.body.continue: // CHECK33-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK33: omp.inner.for.inc: -// CHECK33-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK33-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK33-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK33-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK33-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK33-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK33-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK33: omp.inner.for.end: // CHECK33-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK33: omp.loop.exit: @@ -6250,27 +6250,27 @@ int main (int argc, char **argv) { // CHECK34-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK34-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK34: omp.inner.for.cond: -// CHECK34-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK34-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK34-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK34-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 // CHECK34-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK34-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK34: omp.inner.for.body: -// CHECK34-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK34-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK34-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK34-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK34-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK34-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK34-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !5 +// CHECK34-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !5 // CHECK34-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK34-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK34-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK34-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 // CHECK34-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK34: omp.body.continue: // CHECK34-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK34: omp.inner.for.inc: -// CHECK34-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK34-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK34-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK34-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK34-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK34-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK34-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK34: omp.inner.for.end: // CHECK34-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK34: omp.loop.exit: @@ -6415,27 +6415,27 @@ int main (int argc, char **argv) { // CHECK34-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK34-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK34: omp.inner.for.cond: -// CHECK34-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK34-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK34-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK34-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 // CHECK34-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK34-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK34: omp.inner.for.body: -// CHECK34-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK34-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK34-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK34-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK34-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK34-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK34-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 +// CHECK34-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 // CHECK34-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK34-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK34-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK34-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 // CHECK34-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK34: omp.body.continue: // CHECK34-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK34: omp.inner.for.inc: -// CHECK34-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK34-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 // CHECK34-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK34-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK34-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK34-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK34-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] // CHECK34: omp.inner.for.end: // CHECK34-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK34: omp.loop.exit: @@ -6626,26 +6626,26 @@ int main (int argc, char **argv) { // CHECK35-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK35-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK35: omp.inner.for.cond: -// CHECK35-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK35-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK35-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK35-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK35-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK35-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK35: omp.inner.for.body: -// CHECK35-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK35-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK35-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK35-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK35-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK35-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK35-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !6 +// CHECK35-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !6 // CHECK35-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP17]] -// CHECK35-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK35-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 // CHECK35-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK35: omp.body.continue: // CHECK35-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK35: omp.inner.for.inc: -// CHECK35-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK35-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK35-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK35-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK35-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK35-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK35-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK35: omp.inner.for.end: // CHECK35-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK35: omp.loop.exit: @@ -6786,26 +6786,26 @@ int main (int argc, char **argv) { // CHECK35-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK35-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK35: omp.inner.for.cond: -// CHECK35-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK35-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK35-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK35-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK35-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK35-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK35: omp.inner.for.body: -// CHECK35-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK35-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK35-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK35-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK35-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK35-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK35-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 +// CHECK35-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK35-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] -// CHECK35-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK35-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 // CHECK35-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK35: omp.body.continue: // CHECK35-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK35: omp.inner.for.inc: -// CHECK35-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK35-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK35-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK35-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK35-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK35-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK35-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK35: omp.inner.for.end: // CHECK35-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK35: omp.loop.exit: @@ -6996,26 +6996,26 @@ int main (int argc, char **argv) { // CHECK36-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK36-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK36: omp.inner.for.cond: -// CHECK36-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK36-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK36-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK36-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK36-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK36-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK36: omp.inner.for.body: -// CHECK36-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK36-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK36-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK36-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK36-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK36-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK36-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !6 +// CHECK36-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !6 // CHECK36-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP17]] -// CHECK36-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK36-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 // CHECK36-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK36: omp.body.continue: // CHECK36-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK36: omp.inner.for.inc: -// CHECK36-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK36-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK36-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK36-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK36-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK36-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK36-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK36: omp.inner.for.end: // CHECK36-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK36: omp.loop.exit: @@ -7156,26 +7156,26 @@ int main (int argc, char **argv) { // CHECK36-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK36-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK36: omp.inner.for.cond: -// CHECK36-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK36-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK36-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK36-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK36-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK36-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK36: omp.inner.for.body: -// CHECK36-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK36-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK36-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK36-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK36-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK36-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK36-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 +// CHECK36-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK36-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] -// CHECK36-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK36-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 // CHECK36-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK36: omp.body.continue: // CHECK36-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK36: omp.inner.for.inc: -// CHECK36-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK36-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK36-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK36-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK36-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK36-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK36-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK36: omp.inner.for.end: // CHECK36-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK36: omp.loop.exit: @@ -7405,27 +7405,27 @@ int main (int argc, char **argv) { // CHECK37: omp_if.then: // CHECK37-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK37: omp.inner.for.cond: -// CHECK37-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK37-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK37-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK37-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 // CHECK37-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK37-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK37: omp.inner.for.body: -// CHECK37-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK37-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK37-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK37-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK37-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK37-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK37-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !5 +// CHECK37-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !5 // CHECK37-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK37-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK37-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK37-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 // CHECK37-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK37: omp.body.continue: // CHECK37-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK37: omp.inner.for.inc: -// CHECK37-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK37-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK37-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK37-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK37-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK37-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK37-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK37: omp.inner.for.end: // CHECK37-NEXT: br label [[OMP_IF_END:%.*]] // CHECK37: omp_if.else: @@ -7451,7 +7451,7 @@ int main (int argc, char **argv) { // CHECK37-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK37-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK37-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK37-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK37-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK37: omp.inner.for.end18: // CHECK37-NEXT: br label [[OMP_IF_END]] // CHECK37: omp_if.end: @@ -7598,27 +7598,27 @@ int main (int argc, char **argv) { // CHECK37-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK37-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK37: omp.inner.for.cond: -// CHECK37-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK37-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK37-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK37-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK37-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK37-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK37: omp.inner.for.body: -// CHECK37-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK37-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK37-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK37-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK37-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK37-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK37-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK37-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 // CHECK37-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK37-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK37-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK37-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK37-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK37: omp.body.continue: // CHECK37-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK37: omp.inner.for.inc: -// CHECK37-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK37-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK37-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK37-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK37-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK37-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK37-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK37: omp.inner.for.end: // CHECK37-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK37: omp.loop.exit: @@ -7848,27 +7848,27 @@ int main (int argc, char **argv) { // CHECK38: omp_if.then: // CHECK38-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK38: omp.inner.for.cond: -// CHECK38-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK38-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK38-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK38-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !5 // CHECK38-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK38-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK38: omp.inner.for.body: -// CHECK38-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK38-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK38-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK38-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK38-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK38-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK38-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !5 +// CHECK38-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !5 // CHECK38-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP18]] to i64 // CHECK38-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK38-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK38-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !5 // CHECK38-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK38: omp.body.continue: // CHECK38-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK38: omp.inner.for.inc: -// CHECK38-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK38-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 // CHECK38-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK38-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK38-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP4:![0-9]+]] +// CHECK38-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !5 +// CHECK38-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] // CHECK38: omp.inner.for.end: // CHECK38-NEXT: br label [[OMP_IF_END:%.*]] // CHECK38: omp_if.else: @@ -7894,7 +7894,7 @@ int main (int argc, char **argv) { // CHECK38-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK38-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK38-NEXT: store i32 [[ADD17]], i32* [[DOTOMP_IV]], align 4 -// CHECK38-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK38-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP9:![0-9]+]] // CHECK38: omp.inner.for.end18: // CHECK38-NEXT: br label [[OMP_IF_END]] // CHECK38: omp_if.end: @@ -8041,27 +8041,27 @@ int main (int argc, char **argv) { // CHECK38-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK38-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK38: omp.inner.for.cond: -// CHECK38-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK38-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK38-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK38-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK38-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK38-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK38: omp.inner.for.body: -// CHECK38-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK38-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK38-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK38-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK38-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK38-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK38-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 +// CHECK38-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 // CHECK38-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK38-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK38-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK38-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK38-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK38: omp.body.continue: // CHECK38-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK38: omp.inner.for.inc: -// CHECK38-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK38-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK38-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK38-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK38-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK38-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK38-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK38: omp.inner.for.end: // CHECK38-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK38: omp.loop.exit: @@ -8287,26 +8287,26 @@ int main (int argc, char **argv) { // CHECK39: omp_if.then: // CHECK39-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK39: omp.inner.for.cond: -// CHECK39-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK39-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK39-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK39-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK39-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK39-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK39: omp.inner.for.body: -// CHECK39-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK39-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK39-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK39-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK39-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK39-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK39-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !6 +// CHECK39-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !6 // CHECK39-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP18]] -// CHECK39-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK39-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 // CHECK39-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK39: omp.body.continue: // CHECK39-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK39: omp.inner.for.inc: -// CHECK39-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK39-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK39-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK39-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK39-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK39-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK39-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK39: omp.inner.for.end: // CHECK39-NEXT: br label [[OMP_IF_END:%.*]] // CHECK39: omp_if.else: @@ -8331,7 +8331,7 @@ int main (int argc, char **argv) { // CHECK39-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK39-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK39-NEXT: store i32 [[ADD16]], i32* [[DOTOMP_IV]], align 4 -// CHECK39-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK39-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK39: omp.inner.for.end17: // CHECK39-NEXT: br label [[OMP_IF_END]] // CHECK39: omp_if.end: @@ -8474,26 +8474,26 @@ int main (int argc, char **argv) { // CHECK39-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK39-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK39: omp.inner.for.cond: -// CHECK39-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK39-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK39-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK39-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK39-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK39-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK39: omp.inner.for.body: -// CHECK39-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK39-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK39-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK39-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK39-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK39-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK39-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 +// CHECK39-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 // CHECK39-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] -// CHECK39-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK39-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK39-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK39: omp.body.continue: // CHECK39-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK39: omp.inner.for.inc: -// CHECK39-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK39-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK39-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK39-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK39-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK39-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK39-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK39: omp.inner.for.end: // CHECK39-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK39: omp.loop.exit: @@ -8719,26 +8719,26 @@ int main (int argc, char **argv) { // CHECK40: omp_if.then: // CHECK40-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK40: omp.inner.for.cond: -// CHECK40-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK40-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK40-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK40-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK40-NEXT: [[CMP6:%.*]] = icmp sle i32 [[TMP15]], [[TMP16]] // CHECK40-NEXT: br i1 [[CMP6]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK40: omp.inner.for.body: -// CHECK40-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK40-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK40-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP17]], 1 // CHECK40-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK40-NEXT: store i32 [[ADD]], i32* [[I4]], align 4 -// CHECK40-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4 +// CHECK40-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !6 +// CHECK40-NEXT: [[TMP18:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !6 // CHECK40-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP18]] -// CHECK40-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK40-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 // CHECK40-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK40: omp.body.continue: // CHECK40-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK40: omp.inner.for.inc: -// CHECK40-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK40-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK40-NEXT: [[ADD7:%.*]] = add nsw i32 [[TMP19]], 1 -// CHECK40-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4 -// CHECK40-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK40-NEXT: store i32 [[ADD7]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK40-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK40: omp.inner.for.end: // CHECK40-NEXT: br label [[OMP_IF_END:%.*]] // CHECK40: omp_if.else: @@ -8763,7 +8763,7 @@ int main (int argc, char **argv) { // CHECK40-NEXT: [[TMP24:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 // CHECK40-NEXT: [[ADD16:%.*]] = add nsw i32 [[TMP24]], 1 // CHECK40-NEXT: store i32 [[ADD16]], i32* [[DOTOMP_IV]], align 4 -// CHECK40-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP7:![0-9]+]] +// CHECK40-NEXT: br label [[OMP_INNER_FOR_COND8]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK40: omp.inner.for.end17: // CHECK40-NEXT: br label [[OMP_IF_END]] // CHECK40: omp_if.end: @@ -8906,26 +8906,26 @@ int main (int argc, char **argv) { // CHECK40-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK40-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK40: omp.inner.for.cond: -// CHECK40-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK40-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK40-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK40-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 // CHECK40-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK40-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK40: omp.inner.for.body: -// CHECK40-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK40-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK40-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK40-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK40-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK40-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK40-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !14 +// CHECK40-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !14 // CHECK40-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] -// CHECK40-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK40-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 // CHECK40-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK40: omp.body.continue: // CHECK40-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK40: omp.inner.for.inc: -// CHECK40-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK40-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 // CHECK40-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK40-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK40-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK40-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK40-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] // CHECK40: omp.inner.for.end: // CHECK40-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK40: omp.loop.exit: diff --git a/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp b/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp index 569bf83efae70..fbf312e5e3126 100644 --- a/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp +++ b/clang/test/OpenMP/teams_distribute_simd_dist_schedule_codegen.cpp @@ -269,28 +269,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -354,28 +354,28 @@ int main (int argc, char **argv) { // CHECK1-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK1-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK1-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK1-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK1: omp.loop.exit: @@ -446,28 +446,28 @@ int main (int argc, char **argv) { // CHECK1: omp.dispatch.body: // CHECK1-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK1: omp.inner.for.cond: -// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK1-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK1: omp.inner.for.body: -// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK1-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK1-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK1-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK1: omp.body.continue: // CHECK1-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK1: omp.inner.for.inc: -// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK1-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK1-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK1-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK1-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK1: omp.inner.for.end: // CHECK1-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK1: omp.dispatch.inc: @@ -637,28 +637,28 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !6 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !6 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !6 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !6 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP5:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !6 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP7:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -722,28 +722,28 @@ int main (int argc, char **argv) { // CHECK2-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK2-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK2-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !12 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 // CHECK2-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !12 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK2: omp.loop.exit: @@ -814,28 +814,28 @@ int main (int argc, char **argv) { // CHECK2: omp.dispatch.body: // CHECK2-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK2: omp.inner.for.cond: -// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK2-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK2: omp.inner.for.body: -// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK2-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK2-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i64 0, i64 [[IDXPROM]] -// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK2-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK2: omp.body.continue: // CHECK2-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK2: omp.inner.for.inc: -// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK2-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK2-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 -// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] +// CHECK2-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK2-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK2: omp.inner.for.end: // CHECK2-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK2: omp.dispatch.inc: @@ -1005,27 +1005,27 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !7 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1089,27 +1089,27 @@ int main (int argc, char **argv) { // CHECK3-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK3-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK3-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP9]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK3-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK3: omp.loop.exit: @@ -1180,27 +1180,27 @@ int main (int argc, char **argv) { // CHECK3: omp.dispatch.body: // CHECK3-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK3: omp.inner.for.cond: -// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK3-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK3: omp.inner.for.body: -// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK3-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK3-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK3: omp.body.continue: // CHECK3-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK3: omp.inner.for.inc: -// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK3-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK3-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK3-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK3-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK3: omp.inner.for.end: // CHECK3-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK3: omp.dispatch.inc: @@ -1370,27 +1370,27 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP9]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !7 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP6:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !7 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -1454,27 +1454,27 @@ int main (int argc, char **argv) { // CHECK4-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK4-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK4-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP9]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 // CHECK4-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK4: omp.loop.exit: @@ -1545,27 +1545,27 @@ int main (int argc, char **argv) { // CHECK4: omp.dispatch.body: // CHECK4-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK4: omp.inner.for.cond: -// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK4-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK4: omp.inner.for.body: -// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK4-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[A:%.*]] = getelementptr inbounds [[STRUCT_SS:%.*]], %struct.SS* [[TMP0]], i32 0, i32 0 -// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [123 x i32], [123 x i32]* [[A]], i32 0, i32 [[TMP11]] -// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK4-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK4: omp.body.continue: // CHECK4-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK4: omp.inner.for.inc: -// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 +// CHECK4-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK4-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !11 -// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK4-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK4-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK4: omp.inner.for.end: // CHECK4-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK4: omp.dispatch.inc: @@ -2390,27 +2390,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -2513,27 +2513,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK9-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK9-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -2655,27 +2655,27 @@ int main (int argc, char **argv) { // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK9-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK9-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !18 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK9-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK9-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: @@ -2833,27 +2833,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -2917,27 +2917,27 @@ int main (int argc, char **argv) { // CHECK9-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK9-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK9-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK9-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK9-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK9: omp.loop.exit: @@ -3008,27 +3008,27 @@ int main (int argc, char **argv) { // CHECK9: omp.dispatch.body: // CHECK9-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK9: omp.inner.for.cond: -// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK9-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK9: omp.inner.for.body: -// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK9-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK9-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK9-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK9: omp.body.continue: // CHECK9-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK9: omp.inner.for.inc: -// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK9-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK9-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK9-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK9-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK9: omp.inner.for.end: // CHECK9-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK9: omp.dispatch.inc: @@ -3359,27 +3359,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !9 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !9 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 // CHECK10-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK10-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP8:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !9 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP10:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -3482,27 +3482,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK10-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP17]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !15 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 // CHECK10-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK10-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP12:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !15 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -3624,27 +3624,27 @@ int main (int argc, char **argv) { // CHECK10: omp.dispatch.body: // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK10-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !13 -// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP20]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 +// CHECK10-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 // CHECK10-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK10-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !13 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP14:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: @@ -3802,27 +3802,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !21 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !21 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !21 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !21 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 // CHECK10-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !21 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP22:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -3886,27 +3886,27 @@ int main (int argc, char **argv) { // CHECK10-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK10-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK10-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP9]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !24 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 // CHECK10-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK10-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !24 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK10: omp.loop.exit: @@ -3977,27 +3977,27 @@ int main (int argc, char **argv) { // CHECK10: omp.dispatch.body: // CHECK10-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK10: omp.inner.for.cond: -// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK10-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK10: omp.inner.for.body: -// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK10-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !18 -// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP11]] to i64 // CHECK10-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i64 0, i64 [[IDXPROM]] -// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !27 // CHECK10-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK10: omp.body.continue: // CHECK10-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK10: omp.inner.for.inc: -// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 +// CHECK10-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 // CHECK10-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !18 -// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP19:![0-9]+]] +// CHECK10-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !27 +// CHECK10-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]] // CHECK10: omp.inner.for.end: // CHECK10-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK10: omp.dispatch.inc: @@ -4326,26 +4326,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP17]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -4447,26 +4447,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK11-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK11-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP17]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK11-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -4585,26 +4585,26 @@ int main (int argc, char **argv) { // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK11-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP20]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK11-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK11-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: @@ -4762,26 +4762,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -4845,26 +4845,26 @@ int main (int argc, char **argv) { // CHECK11-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK11-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK11-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK11-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK11-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK11: omp.loop.exit: @@ -4935,26 +4935,26 @@ int main (int argc, char **argv) { // CHECK11: omp.dispatch.body: // CHECK11-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK11: omp.inner.for.cond: -// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 // CHECK11-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK11-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK11: omp.inner.for.body: -// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK11-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK11-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !28 // CHECK11-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK11-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK11: omp.body.continue: // CHECK11-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK11: omp.inner.for.inc: -// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK11-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK11-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK11-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK11-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK11: omp.inner.for.end: // CHECK11-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK11: omp.dispatch.inc: @@ -5283,26 +5283,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !10 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !10 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP17]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !10 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 // CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP9:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !10 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP11:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -5404,26 +5404,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP13]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP14:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK12-NEXT: [[TMP15:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !16 // CHECK12-NEXT: [[CMP5:%.*]] = icmp sle i32 [[TMP14]], [[TMP15]] // CHECK12-NEXT: br i1 [[CMP5]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP16:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP16]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4 -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I3]], align 4, !llvm.access.group !16 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[I3]], align 4, !llvm.access.group !16 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP17]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !16 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 // CHECK12-NEXT: [[ADD6:%.*]] = add nsw i32 [[TMP18]], 1 -// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP13:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD6]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !16 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -5542,26 +5542,26 @@ int main (int argc, char **argv) { // CHECK12: omp.dispatch.body: // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: [[TMP17:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP18:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[CMP7:%.*]] = icmp sle i32 [[TMP17]], [[TMP18]] // CHECK12-NEXT: br i1 [[CMP7]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: [[TMP19:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP19]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !14 -// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I4]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP20:%.*]] = load i32, i32* [[I4]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, i32* [[TMP2]], i32 [[TMP20]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 +// CHECK12-NEXT: [[TMP21:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 // CHECK12-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP21]], 1 -// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !14 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD8]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK12: omp.dispatch.inc: @@ -5719,26 +5719,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !22 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP17:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !22 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP23:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -5802,26 +5802,26 @@ int main (int argc, char **argv) { // CHECK12-NEXT: store i32 [[TMP5]], i32* [[DOTOMP_IV]], align 4 // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4 +// CHECK12-NEXT: [[TMP6:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !25 // CHECK12-NEXT: [[CMP1:%.*]] = icmp sle i32 [[TMP6]], [[TMP7]] // CHECK12-NEXT: br i1 [[CMP1]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP8]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !25 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP9]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !25 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 // CHECK12-NEXT: [[ADD2:%.*]] = add nsw i32 [[TMP10]], 1 -// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP18:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD2]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !25 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP26:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_LOOP_EXIT:%.*]] // CHECK12: omp.loop.exit: @@ -5892,26 +5892,26 @@ int main (int argc, char **argv) { // CHECK12: omp.dispatch.body: // CHECK12-NEXT: br label [[OMP_INNER_FOR_COND:%.*]] // CHECK12: omp.inner.for.cond: -// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP8:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK12-NEXT: [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4, !llvm.access.group !28 // CHECK12-NEXT: [[CMP2:%.*]] = icmp sle i32 [[TMP8]], [[TMP9]] // CHECK12-NEXT: br i1 [[CMP2]], label [[OMP_INNER_FOR_BODY:%.*]], label [[OMP_INNER_FOR_END:%.*]] // CHECK12: omp.inner.for.body: -// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP10:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK12-NEXT: [[MUL:%.*]] = mul nsw i32 [[TMP10]], 1 // CHECK12-NEXT: [[ADD:%.*]] = add nsw i32 0, [[MUL]] -// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !19 -// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: store i32 [[ADD]], i32* [[I]], align 4, !llvm.access.group !28 +// CHECK12-NEXT: [[TMP11:%.*]] = load i32, i32* [[I]], align 4, !llvm.access.group !28 // CHECK12-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds [10 x i32], [10 x i32]* [[TMP0]], i32 0, i32 [[TMP11]] -// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: store i32 0, i32* [[ARRAYIDX]], align 4, !llvm.access.group !28 // CHECK12-NEXT: br label [[OMP_BODY_CONTINUE:%.*]] // CHECK12: omp.body.continue: // CHECK12-NEXT: br label [[OMP_INNER_FOR_INC:%.*]] // CHECK12: omp.inner.for.inc: -// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 +// CHECK12-NEXT: [[TMP12:%.*]] = load i32, i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 // CHECK12-NEXT: [[ADD3:%.*]] = add nsw i32 [[TMP12]], 1 -// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !19 -// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP20:![0-9]+]] +// CHECK12-NEXT: store i32 [[ADD3]], i32* [[DOTOMP_IV]], align 4, !llvm.access.group !28 +// CHECK12-NEXT: br label [[OMP_INNER_FOR_COND]], !llvm.loop [[LOOP29:![0-9]+]] // CHECK12: omp.inner.for.end: // CHECK12-NEXT: br label [[OMP_DISPATCH_INC:%.*]] // CHECK12: omp.dispatch.inc: diff --git a/clang/test/Parser/cxx-template-argument.cpp b/clang/test/Parser/cxx-template-argument.cpp index 0e72cbfbfb175..a42a473409eed 100644 --- a/clang/test/Parser/cxx-template-argument.cpp +++ b/clang/test/Parser/cxx-template-argument.cpp @@ -22,15 +22,18 @@ namespace greatergreater { void f(S=0); // expected-error {{a space is required between a right angle bracket and an equals sign (use '> =')}} void f(S>=S()); // expected-error {{use '> >'}} expected-error {{use '> ='}} template void t(); + struct R { + friend void operator==(void (*)(), R) {} + friend void operator>=(void (*)(), R) {} + }; void g() { - void (*p)() = &t; - (void)(&t==p); // expected-error {{use '> ='}} - (void)(&t>=p); // expected-error {{use '> >'}} - (void)(&t>>=p); + (void)(&t==R()); // expected-error {{use '> ='}} + (void)(&t>=R()); // expected-error {{use '> >'}} + (void)(&t>>=R()); #if __cplusplus <= 199711L // expected-error@-2 {{use '> >'}} #endif - (void)(&t>==p); // expected-error {{use '> >'}} expected-error {{use '> ='}} + (void)(&t>==R()); // expected-error {{use '> >'}} expected-error {{use '> ='}} } } diff --git a/clang/test/Preprocessor/Inputs/pragma_sysheader.h b/clang/test/Preprocessor/Inputs/pragma_sysheader.h new file mode 100644 index 0000000000000..7352370e724b4 --- /dev/null +++ b/clang/test/Preprocessor/Inputs/pragma_sysheader.h @@ -0,0 +1,19 @@ +#if defined(CLANG) +#pragma clang system_header +// expected-no-diagnostics +#elif defined(GCC) +#pragma GCC system_header +// expected-no-diagnostics +#elif defined(MS) +#pragma system_header +// expected-no-diagnostics +#else +// expected-warning@+1{{unknown pragma ignored}} +#pragma system_header + +// expected-note@+4{{previous definition is here}} +// expected-warning@+4{{redefinition of typedef 'x' is a C11 feature}} +#endif + +typedef int x; +typedef int x; diff --git a/clang/test/Preprocessor/aix-vec_extabi.c b/clang/test/Preprocessor/aix-vec_extabi.c index b87b992d8d45d..c83c8b64487ee 100644 --- a/clang/test/Preprocessor/aix-vec_extabi.c +++ b/clang/test/Preprocessor/aix-vec_extabi.c @@ -2,11 +2,11 @@ // RUN: | FileCheck %s -check-prefix=EXTABI // RUN: %clang -target powerpc64-ibm-aix-xcoff -mcpu=pwr8 -E -dM -maltivec -mabi=vec-extabi %s -o - 2>&1 \ // RUN: | FileCheck %s -check-prefix=EXTABI -// RUN: not %clang -target powerpc-ibm-aix-xcoff -mcpu=pwr8 -E -dM -maltivec -mabi=vec-default %s 2>&1 \ +// RUN: %clang -target powerpc-ibm-aix-xcoff -mcpu=pwr8 -E -dM -maltivec -mabi=vec-default %s 2>&1 \ // RUN: | FileCheck %s -check-prefix=DFLTABI -// RUN: not %clang -target powerpc64-ibm-aix-xcoff -mcpu=pwr8 -E -dM -maltivec -mabi=vec-default %s 2>&1 \ +// RUN: %clang -target powerpc64-ibm-aix-xcoff -mcpu=pwr8 -E -dM -maltivec -mabi=vec-default %s 2>&1 \ // RUN: | FileCheck %s -check-prefix=DFLTABI -// EXTABI: #define __EXTABI__ -// DFLTABI: The default Altivec ABI on AIX is not yet supported, use '-mabi=vec-extabi' for the extended Altivec ABI +// EXTABI: #define __EXTABI__ +// DFLTABI-NOT: #define __EXTABI__ diff --git a/clang/test/Preprocessor/pragma_sysheader.c b/clang/test/Preprocessor/pragma_sysheader.c index 3c94363152af7..421bfb839ee30 100644 --- a/clang/test/Preprocessor/pragma_sysheader.c +++ b/clang/test/Preprocessor/pragma_sysheader.c @@ -1,13 +1,15 @@ -// RUN: %clang_cc1 -verify -pedantic %s -fsyntax-only -// RUN: %clang_cc1 -E %s | FileCheck %s -// expected-no-diagnostics -// rdar://6899937 -#include "pragma_sysheader.h" +// RUN: %clang_cc1 -verify -std=c99 -Wunknown-pragmas -pedantic %s -fsyntax-only +// RUN: %clang_cc1 -verify -std=c99 -Wunknown-pragmas -pedantic %s -fsyntax-only -DGCC +// RUN: %clang_cc1 -verify -std=c99 -Wunknown-pragmas -pedantic %s -fsyntax-only -DCLANG +// RUN: %clang_cc1 -verify -std=c99 -Wunknown-pragmas -pedantic %s -fsyntax-only -fms-extensions -DMS +// rdar://6899937 +#include "Inputs/pragma_sysheader.h" +// RUN: %clang_cc1 -E %s | FileCheck %s // PR9861: Verify that line markers are not messed up in -E mode. // CHECK: # 1 "{{.*}}pragma_sysheader.h" 1 -// CHECK-NEXT: # 2 "{{.*}}pragma_sysheader.h" 3 -// CHECK-NEXT: typedef int x; -// CHECK-NEXT: typedef int x; -// CHECK-NEXT: # 6 "{{.*}}pragma_sysheader.c" 2 +// CHECK-NEXT: # 12 "{{.*}}pragma_sysheader.h" +// CHECK: typedef int x; +// CHECK: typedef int x; +// CHECK-NEXT: # 8 "{{.*}}pragma_sysheader.c" 2 diff --git a/clang/test/Preprocessor/pragma_sysheader.h b/clang/test/Preprocessor/pragma_sysheader.h deleted file mode 100644 index b79bde584a98b..0000000000000 --- a/clang/test/Preprocessor/pragma_sysheader.h +++ /dev/null @@ -1,4 +0,0 @@ -#pragma GCC system_header -typedef int x; -typedef int x; - diff --git a/clang/test/Preprocessor/sycl-macro-target-specific.cpp b/clang/test/Preprocessor/sycl-macro-target-specific.cpp index 3148096a37214..2f37ec0422296 100644 --- a/clang/test/Preprocessor/sycl-macro-target-specific.cpp +++ b/clang/test/Preprocessor/sycl-macro-target-specific.cpp @@ -23,3 +23,16 @@ // RUN: | FileCheck --check-prefix=CHECK-SYCL-FP-ATOMICS-NEG %s // CHECK-SYCL-FP-ATOMICS: #define SYCL_USE_NATIVE_FP_ATOMICS // CHECK-SYCL-FP-ATOMICS-NEG-NOT: #define SYCL_USE_NATIVE_FP_ATOMICS + +// RUN: %clang_cc1 %s -fsycl-is-device -triple spir64_fpga-unknown-unknown-sycldevice -E -dM \ +// RUN: | FileCheck --check-prefix=CHECK-USM-ADDR-SPACE %s +// RUN: %clang_cc1 %s -fsycl-is-device -triple spir64-unknown-unknown-sycldevice -E -dM \ +// RUN: | FileCheck --check-prefix=CHECK-USM-ADDR-SPACE-NEG %s +// RUN: %clang_cc1 %s -fsycl-is-device -triple spir64_gen-unknown-unknown-sycldevice -E -dM \ +// RUN: | FileCheck --check-prefix=CHECK-USM-ADDR-SPACE-NEG %s +// RUN: %clang_cc1 %s -fsycl-is-device -triple spir64_x86_64-unknown-unknown-sycldevice -E -dM \ +// RUN: | FileCheck --check-prefix=CHECK-USM-ADDR-SPACE-NEG %s +// RUN: %clang_cc1 %s -fsycl-is-device -triple nvptx64-nvidia-nvcl-sycldevice -E -dM \ +// RUN: | FileCheck --check-prefix=CHECK-USM-ADDR-SPACE-NEG %s +// CHECK-USM-ADDR-SPACE: #define __ENABLE_USM_ADDR_SPACE__ +// CHECK-USM-ADDR-SPACE-NEG-NOT: #define __ENABLE_USM_ADDR_SPACE__ diff --git a/clang/test/Sema/aarch64-sve-alias-attribute.c b/clang/test/Sema/aarch64-sve-alias-attribute.c new file mode 100644 index 0000000000000..306d98d27ac97 --- /dev/null +++ b/clang/test/Sema/aarch64-sve-alias-attribute.c @@ -0,0 +1,5 @@ +// RUN: %clang_cc1 -triple aarch64-unknown-linux-gnu -aux-triple aarch64-none-unknown-eabi -target-feature +sve -fopenmp-is-device -fopenmp -verify -fsyntax-only %s + +static __inline__ __attribute__((__clang_arm_builtin_alias(__builtin_sve_svabd_n_f64_m))) // expected-no-diagnostics +void +nop(void); diff --git a/clang/test/Sema/compare.c b/clang/test/Sema/compare.c index 85dcffc502fd4..6f25eb21ff3c9 100644 --- a/clang/test/Sema/compare.c +++ b/clang/test/Sema/compare.c @@ -220,7 +220,7 @@ int pointers(int *a) { int function_pointers(int (*a)(int), int (*b)(int), void (*c)(int)) { return a > b; // expected-warning {{ordered comparison of function pointers}} return function_pointers > function_pointers; // expected-warning {{self-comparison always evaluates to false}} expected-warning{{ordered comparison of function pointers}} - return a > c; // expected-warning {{comparison of distinct pointer types}} + return a > c; // expected-warning {{comparison of distinct pointer types}} expected-warning {{ordered comparison of function pointers}} return a == (void *) 0; return a == (void *) 1; // expected-warning {{equality comparison between function pointer and void pointer}} } diff --git a/clang/test/Sema/no_profile-attribute.c b/clang/test/Sema/no_profile-attribute.c deleted file mode 100644 index b3c073f130ac3..0000000000000 --- a/clang/test/Sema/no_profile-attribute.c +++ /dev/null @@ -1,13 +0,0 @@ -// RUN: %clang_cc1 %s -fsyntax-only -verify -__attribute__((no_profile)) -void no_profile0(void); -#if !__has_attribute(no_profile) -#error "Where did the no_profile function attribute go?" -#endif - -void no_profile1(__attribute__((no_profile)) int param); // expected-warning {{'no_profile' attribute only applies to functions}} -__attribute__((no_profile(""))) // expected-error {{'no_profile' attribute takes no arguments}} -void no_profile2(void); -void no_profile3(void) { - __attribute__((no_profile)); // expected-error {{'no_profile' attribute cannot be applied to a statement}} -} diff --git a/clang/test/Sema/no_profile_instrument_function-attribute.c b/clang/test/Sema/no_profile_instrument_function-attribute.c new file mode 100644 index 0000000000000..4ede13c741077 --- /dev/null +++ b/clang/test/Sema/no_profile_instrument_function-attribute.c @@ -0,0 +1,13 @@ +// RUN: %clang_cc1 %s -fsyntax-only -verify +__attribute__((no_profile_instrument_function)) +void no_profile0(void); +#if !__has_attribute(no_profile_instrument_function) +#error "Where did the no_profile_instrument_function function attribute go?" +#endif + +void no_profile1(__attribute__((no_profile_instrument_function)) int param); // expected-warning {{'no_profile_instrument_function' attribute only applies to functions}} +__attribute__((no_profile_instrument_function(""))) // expected-error {{'no_profile_instrument_function' attribute takes no arguments}} +void no_profile2(void); +void no_profile3(void) { + __attribute__((no_profile_instrument_function)); // expected-error {{'no_profile_instrument_function' attribute cannot be applied to a statement}} +} diff --git a/clang/test/Sema/reserved-identifier.cpp b/clang/test/Sema/reserved-identifier.cpp index 1992f6976c38f..1d3e0f9f0043f 100644 --- a/clang/test/Sema/reserved-identifier.cpp +++ b/clang/test/Sema/reserved-identifier.cpp @@ -76,11 +76,19 @@ namespace { int _barbatruc; // no-warning } -long double operator"" _BarbeBleue(long double) // expected-warning {{identifier 'operator""_BarbeBleue' is reserved because it starts with '_' followed by a capital letter}} +long double operator"" _BarbeBleue(long double) // expected-warning {{identifier '_BarbeBleue' is reserved because it starts with '_' followed by a capital letter}} { return 0.; } +long double operator""_SacreBleu(long double) // no-warning +{ + return 0.; +} + +long double sacrebleu = operator"" _SacreBleu(1.2); // expected-warning {{identifier '_SacreBleu' is reserved because it starts with '_' followed by a capital letter}} +long double sangbleu = operator""_SacreBleu(1.2); // no-warning + struct _BarbeRouge { // expected-warning {{identifier '_BarbeRouge' is reserved because it starts with '_' followed by a capital letter}} } p; struct _BarbeNoire { // expected-warning {{identifier '_BarbeNoire' is reserved because it starts with '_' followed by a capital letter}} diff --git a/clang/test/SemaCUDA/deferred-oeverload.cu b/clang/test/SemaCUDA/deferred-oeverload.cu index c800ac527ead6..d8343526966c3 100644 --- a/clang/test/SemaCUDA/deferred-oeverload.cu +++ b/clang/test/SemaCUDA/deferred-oeverload.cu @@ -5,6 +5,11 @@ // RUN: %clang_cc1 -fopenmp -fsyntax-only -verify=host,com %s \ // RUN: -std=c++11 -fgpu-defer-diag +// With -fgpu-defer-diag, clang defers overloading resolution induced +// diagnostics when the full candidates set include host device +// functions or wrong-sided candidates. This roughly matches nvcc's +// behavior. + #include "Inputs/cuda.h" // When callee is called by a host function with integer arguments, there is an error for ambiguity. @@ -31,12 +36,20 @@ __host__ void callee4(int); // com-note 2{{candidate function not viable: requir __host__ void callee5(float); // com-note {{candidate function}} __host__ void callee5(double); // com-note {{candidate function}} +// When '<<` operator is called by a device function, there is error for 'invalid operands'. +// It should be deferred since it involves wrong-sided candidates. +struct S { + __host__ S &operator <<(int i); // dev-note {{candidate function not viable}} +}; + __host__ void hf() { callee(1); // host-error {{call to 'callee' is ambiguous}} callee2(); callee3(); callee4(); // com-error {{no matching function for call to 'callee4'}} callee5(1); // com-error {{call to 'callee5' is ambiguous}} + S s; + s << 1; undeclared_func(); // com-error {{use of undeclared identifier 'undeclared_func'}} } @@ -45,6 +58,8 @@ __device__ void df() { callee2(); // dev-error {{no matching function for call to 'callee2'}} callee3(); // dev-error {{no matching function for call to 'callee3'}} callee4(); // com-error {{no matching function for call to 'callee4'}} + S s; + s << 1; // dev-error {{invalid operands to binary expression}} } struct A { int x; typedef int isA; }; diff --git a/clang/test/SemaCXX/compare-cxx2a.cpp b/clang/test/SemaCXX/compare-cxx2a.cpp index a45955418c446..fdeb93153715d 100644 --- a/clang/test/SemaCXX/compare-cxx2a.cpp +++ b/clang/test/SemaCXX/compare-cxx2a.cpp @@ -212,8 +212,6 @@ struct Class {}; struct ClassB : Class {}; struct Class2 {}; using FnTy = void(int); -using FnTy2 = long(int); -using FnTy3 = void(int) noexcept; using MemFnTy = void (Class::*)() const; using MemDataTy = long(Class::*); @@ -232,11 +230,6 @@ void test_memptr(MemFnTy mf, MemDataTy md) { (void)(md <=> md); // expected-error {{invalid operands}} expected-warning {{self-comparison}} } -void test_compatible_pointer(FnTy *f1, FnTy2 *f2, FnTy3 *f3) { - (void)(f1 <=> f2); // expected-error {{distinct pointer types}} - (void)(f1 <=> f3); // expected-error {{invalid operands}} -} - // Test that variable narrowing is deferred for value dependent expressions template auto test_template_overflow() { diff --git a/clang/test/SemaCXX/compare-function-pointer.cpp b/clang/test/SemaCXX/compare-function-pointer.cpp new file mode 100644 index 0000000000000..0b185ce08f917 --- /dev/null +++ b/clang/test/SemaCXX/compare-function-pointer.cpp @@ -0,0 +1,27 @@ +// RUN: %clang_cc1 -fsyntax-only -std=c++20 -verify %s + +using fp0_t = void (*)(); +using fp1_t = int (*)(); + +extern fp0_t a, b; +extern fp1_t c; + +bool eq0 = a == b; +bool ne0 = a != b; +bool lt0 = a < b; // expected-warning {{ordered comparison of function pointers ('fp0_t' (aka 'void (*)()') and 'fp0_t')}} +bool le0 = a <= b; // expected-warning {{ordered comparison of function pointers}} +bool gt0 = a > b; // expected-warning {{ordered comparison of function pointers}} +bool ge0 = a >= b; // expected-warning {{ordered comparison of function pointers}} +auto tw0 = a <=> b; // expected-error {{ordered comparison of function pointers}} + +bool eq1 = a == c; // expected-error {{comparison of distinct pointer types}} +bool ne1 = a != c; // expected-error {{comparison of distinct pointer types}} +bool lt1 = a < c; // expected-warning {{ordered comparison of function pointers ('fp0_t' (aka 'void (*)()') and 'fp1_t' (aka 'int (*)()'))}} + // expected-error@-1 {{comparison of distinct pointer types}} +bool le1 = a <= c; // expected-warning {{ordered comparison of function pointers}} + // expected-error@-1 {{comparison of distinct pointer types}} +bool gt1 = a > c; // expected-warning {{ordered comparison of function pointers}} + // expected-error@-1 {{comparison of distinct pointer types}} +bool ge1 = a >= c; // expected-warning {{ordered comparison of function pointers}} + // expected-error@-1 {{comparison of distinct pointer types}} +auto tw1 = a <=> c; // expected-error {{ordered comparison of function pointers}} diff --git a/clang/test/SemaCXX/lambdas-implicit-explicit-template.cpp b/clang/test/SemaCXX/lambdas-implicit-explicit-template.cpp new file mode 100644 index 0000000000000..13fe12abe9e9d --- /dev/null +++ b/clang/test/SemaCXX/lambdas-implicit-explicit-template.cpp @@ -0,0 +1,41 @@ +// RUN: %clang_cc1 -std=c++20 -DEXPLICIT -verify %s +// RUN: %clang_cc1 -std=c++17 -DEXPLICIT -verify -Wno-c++20-extensions %s +// RUN: %clang_cc1 -std=c++14 -verify %s + +// expected-no-diagnostics + +#ifdef EXPLICIT + +template +void a(F &&f) { + f.template operator()<0>(); +} + +template +void b(F &&f) { + a([=]() { + f.template operator()(); + }); +} + +void c() { + b([&]() { + }); +} + +#endif + +template void a1(F f) { f.operator()(0); } + +template void b1(F f) { + a1([=](auto i) { f.operator()(i); }); +} + +void c1() { + b1([&](auto i) {}); +} + +void c2() { + const auto lambda = [&](auto arg1) {}; + [&](auto arg2) { lambda.operator()(arg2); }(0); +} diff --git a/clang/test/SemaCXX/warn-unused-private-field.cpp b/clang/test/SemaCXX/warn-unused-private-field.cpp index fe44122a1d051..e67603eaceae6 100644 --- a/clang/test/SemaCXX/warn-unused-private-field.cpp +++ b/clang/test/SemaCXX/warn-unused-private-field.cpp @@ -1,5 +1,26 @@ // RUN: %clang_cc1 -fsyntax-only -Wunused-private-field -Wused-but-marked-unused -Wno-uninitialized -verify -std=c++11 %s // RUN: %clang_cc1 -fsyntax-only -Wunused-private-field -Wused-but-marked-unused -Wno-uninitialized -verify -std=c++17 %s +// RUN: %clang_cc1 -fsyntax-only -Wunused-private-field -Wused-but-marked-unused -Wno-uninitialized -verify -std=c++20 %s + +#if __cplusplus >= 202002L + +class EqDefaultCompare { + int used; + +public: + EqDefaultCompare(int x) : used(x) {} + bool operator==(const EqDefaultCompare &) const = default; +}; + +class SpaceShipDefaultCompare { + int used; + +public: + SpaceShipDefaultCompare(int x) : used(x) {} + int operator<=>(const SpaceShipDefaultCompare &) const = default; +}; + +#endif class NotFullyDefined { public: diff --git a/clang/test/SemaOpenCL/unsupported-image.cl b/clang/test/SemaOpenCL/unsupported-image.cl new file mode 100644 index 0000000000000..3aed9c1f13199 --- /dev/null +++ b/clang/test/SemaOpenCL/unsupported-image.cl @@ -0,0 +1,61 @@ +// RUN: %clang_cc1 -triple spir-unknown-unknown -verify -cl-std=CL3.0 -cl-ext=-__opencl_c_images %s +// RUN: %clang_cc1 -triple spir-unknown-unknown -verify -cl-std=CL3.0 -cl-ext=+__opencl_c_images %s + +#ifdef __opencl_c_images +//expected-no-diagnostics +#endif + +void test1(image1d_t i) {} +#if !defined(__opencl_c_images) +// expected-error@-2{{use of type '__read_only image1d_t' requires __opencl_c_images support}} +#endif + +void test2(image2d_t i) {} +#if !defined(__opencl_c_images) +// expected-error@-2{{use of type '__read_only image2d_t' requires __opencl_c_images support}} +#endif + +void test3(image1d_array_t i) {} +#if !defined(__opencl_c_images) +// expected-error@-2{{use of type '__read_only image1d_array_t' requires __opencl_c_images support}} +#endif + +void test4(image2d_array_t i) {} +#if !defined(__opencl_c_images) +// expected-error@-2{{use of type '__read_only image2d_array_t' requires __opencl_c_images support}} +#endif + +void test5(image2d_depth_t i) {} +#if !defined(__opencl_c_images) +// expected-error@-2{{use of type '__read_only image2d_depth_t' requires __opencl_c_images support}} +#endif + +void test6(image1d_buffer_t i) {} +#if !defined(__opencl_c_images) +// expected-error@-2{{use of type '__read_only image1d_buffer_t' requires __opencl_c_images support}} +#endif + +void test7(image2d_msaa_t i) {} +#if !defined(__opencl_c_images) +// expected-error@-2{{use of type '__read_only image2d_msaa_t' requires __opencl_c_images support}} +#endif + +void test8(image2d_array_msaa_t i) {} +#if !defined(__opencl_c_images) +// expected-error@-2{{use of type '__read_only image2d_array_msaa_t' requires __opencl_c_images support}} +#endif + +void test9(image2d_msaa_depth_t i) {} +#if !defined(__opencl_c_images) +// expected-error@-2{{use of type '__read_only image2d_msaa_depth_t' requires __opencl_c_images support}} +#endif + +void test10(image2d_array_msaa_depth_t i) {} +#if !defined(__opencl_c_images) +// expected-error@-2{{use of type '__read_only image2d_array_msaa_depth_t' requires __opencl_c_images support}} +#endif + +void test11(sampler_t s) {} +#if !defined(__opencl_c_images) +// expected-error@-2{{use of type 'sampler_t' requires __opencl_c_images support}} +#endif diff --git a/clang/test/SemaSYCL/array-kernel-param-neg.cpp b/clang/test/SemaSYCL/array-kernel-param-neg.cpp deleted file mode 100755 index 5b7bdbc1a3e66..0000000000000 --- a/clang/test/SemaSYCL/array-kernel-param-neg.cpp +++ /dev/null @@ -1,49 +0,0 @@ -// RUN: %clang_cc1 -fsycl-is-device -internal-isystem %S/Inputs -fcxx-exceptions -sycl-std=2020 -verify -fsyntax-only %s - -// This test checks if compiler reports compilation error on an attempt to pass -// an array of non-trivially copyable structs as SYCL kernel parameter or -// a non-constant size array. - -#include "sycl.hpp" - -sycl::queue q; - -struct NonTrivialCopyStruct { - int i; - NonTrivialCopyStruct(int _i) : i(_i) {} - NonTrivialCopyStruct(const NonTrivialCopyStruct &x) : i(x.i) {} -}; - -struct NonTrivialDestructorStruct { - int i; - ~NonTrivialDestructorStruct(); -}; - -class Array { - // expected-error@+1 {{kernel parameter is not a constant size array}} - int NonConstantSizeArray[]; - -public: - int operator()() const { return NonConstantSizeArray[0]; } -}; - -void test() { - NonTrivialCopyStruct NTCSObject[4] = {1, 2, 3, 4}; - NonTrivialDestructorStruct NTDSObject[5]; - // expected-note@+1 {{'UnknownSizeArrayObj' declared here}} - Array UnknownSizeArrayObj; - - q.submit([&](sycl::handler &h) { - h.single_task([=] { - // expected-error@+1 {{kernel parameter has non-trivially copy constructible class/struct type}} - int b = NTCSObject[2].i; - // expected-error@+1 {{kernel parameter has non-trivially destructible class/struct type}} - int d = NTDSObject[4].i; - }); - }); - - q.submit([&](sycl::handler &h) { - // expected-error@+1 {{variable 'UnknownSizeArrayObj' with flexible array member cannot be captured in a lambda expression}} - h.single_task(UnknownSizeArrayObj); - }); -} diff --git a/clang/test/SemaSYCL/non-std-layout-param.cpp b/clang/test/SemaSYCL/non-std-layout-param.cpp deleted file mode 100644 index 934cbf44d51ca..0000000000000 --- a/clang/test/SemaSYCL/non-std-layout-param.cpp +++ /dev/null @@ -1,53 +0,0 @@ -// RUN: %clang_cc1 -fsycl-is-device -fsycl-std-layout-kernel-params -verify -Wno-sycl-2017-compat -fsyntax-only %s -// RUN: %clang_cc1 -fsycl-is-device -Wno-sycl-2017-compat -fsyntax-only %s - -// This test checks if compiler reports compilation error on an attempt to pass -// non-standard layout struct object as SYCL kernel parameter. - -struct Base { - int X; -}; - -// This struct has non-standard layout, because both C (the most derived class) -// and Base have non-static data members. -struct C : public Base { - int Y; -}; - -template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { - kernelFunc(); -} - -void test() { - C C0; - C0.Y=0; - kernel_single_task([=] { - // expected-error@+1 {{kernel parameter has non-standard layout class/struct type 'C'}} - (void)C0.Y; - }); -} - -struct Kernel { - void operator()() const { - (void) c1; - (void) c2; - (void) p; - (void) q; - } - - int p; - // expected-error@+1 {{kernel parameter has non-standard layout class/struct type 'C'}} - C c1; - - int q; - - // expected-error@+1 {{kernel parameter has non-standard layout class/struct type 'C'}} - C c2; -}; - -void test_struct_field() { - Kernel k{}; - - kernel_single_task(k); -} diff --git a/clang/test/SemaSYCL/non-trivially-copyable-kernel-param.cpp b/clang/test/SemaSYCL/non-trivially-copyable-kernel-param.cpp deleted file mode 100644 index 5705f2df03acc..0000000000000 --- a/clang/test/SemaSYCL/non-trivially-copyable-kernel-param.cpp +++ /dev/null @@ -1,43 +0,0 @@ -// RUN: %clang_cc1 -fsycl-is-device -verify -Wno-sycl-2017-compat -fsyntax-only %s - -// This test checks if compiler reports compilation error on an attempt to pass -// a struct with non-trivially copyable type as SYCL kernel parameter. - -struct A { int i; }; - -struct B { - int i; - B (int _i) : i(_i) {} - B (const B& x) : i(x.i) {} -}; - -struct C : A { - const A C2; - C() : A{0}, C2{2}{} -}; - -struct D { - int i; - ~D(); -}; - -template -__attribute__((sycl_kernel)) void kernel_single_task(const Func &kernelFunc) { - kernelFunc(); -} - -void test() { - A IamGood; - IamGood.i = 0; - B IamBad(1); - C IamAlsoGood; - D IamAlsoBad{0}; - kernel_single_task([=] { - int a = IamGood.i; - // expected-error@+1 {{kernel parameter has non-trivially copy constructible class/struct type}} - int b = IamBad.i; - int c = IamAlsoGood.i; - // expected-error@+1 {{kernel parameter has non-trivially destructible class/struct type}} - int d = IamAlsoBad.i; - }); -} diff --git a/clang/test/SemaTemplate/resolve-single-template-id.cpp b/clang/test/SemaTemplate/resolve-single-template-id.cpp index 9562845a9a6e7..8d7bf0847fb1c 100644 --- a/clang/test/SemaTemplate/resolve-single-template-id.cpp +++ b/clang/test/SemaTemplate/resolve-single-template-id.cpp @@ -65,12 +65,14 @@ int main() void (*u)(int) = oneT; b = (void (*)()) twoT; - - one < one; //expected-warning {{self-comparison always evaluates to false}} \ - //expected-warning {{relational comparison result unused}} - oneT < oneT; //expected-warning {{self-comparison always evaluates to false}} \ - //expected-warning {{relational comparison result unused}} + one < one; // expected-warning {{self-comparison always evaluates to false}} \ + // expected-warning {{relational comparison result unused}} \ + // expected-warning {{ordered comparison of function pointers}} + + oneT < oneT; // expected-warning {{self-comparison always evaluates to false}} \ + // expected-warning {{relational comparison result unused}} \ + // expected-warning {{ordered comparison of function pointers}} two < two; //expected-error 2 {{reference to overloaded function could not be resolved; did you mean to call it with no arguments?}} expected-error {{invalid operands to binary expression ('void' and 'void')}} twoT < twoT; //expected-error {{reference to overloaded function could not be resolved; did you mean to call it?}} {{cannot resolve overloaded function 'twoT' from context}} diff --git a/clang/test/lit.site.cfg.py.in b/clang/test/lit.site.cfg.py.in index 85526b9d30d6a..f31ede2c39575 100644 --- a/clang/test/lit.site.cfg.py.in +++ b/clang/test/lit.site.cfg.py.in @@ -10,6 +10,7 @@ config.llvm_shlib_dir = path(r"@SHLIBDIR@") config.llvm_plugin_ext = "@LLVM_PLUGIN_EXT@" config.lit_tools_dir = path(r"@LLVM_LIT_TOOLS_DIR@") config.errc_messages = "@LLVM_LIT_ERRC_MESSAGES@" +config.clang_lit_site_cfg = __file__ config.clang_obj_root = path(r"@CLANG_BINARY_DIR@") config.clang_src_dir = path(r"@CLANG_SOURCE_DIR@") config.clang_tools_dir = path(r"@CLANG_TOOLS_DIR@") diff --git a/clang/test/utils/update_cc_test_checks/Inputs/check-globals.c b/clang/test/utils/update_cc_test_checks/Inputs/check-globals.c new file mode 100644 index 0000000000000..a63cec246e468 --- /dev/null +++ b/clang/test/utils/update_cc_test_checks/Inputs/check-globals.c @@ -0,0 +1,10 @@ +// First, make sure --check-globals doesn't crash on a non-FileChecked command. +// RUN: true +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -emit-llvm -o - %s | FileCheck %s + +void foo() { + static int i, j; +} +void bar() { + static int i, j; +} diff --git a/clang/test/utils/update_cc_test_checks/Inputs/lit.cfg.example b/clang/test/utils/update_cc_test_checks/Inputs/lit.cfg.example new file mode 100644 index 0000000000000..4e221c70b62f7 --- /dev/null +++ b/clang/test/utils/update_cc_test_checks/Inputs/lit.cfg.example @@ -0,0 +1,8 @@ +import lit +lit_config.load_config( + config, os.path.join(lit_config.params.get('clang_lit_site_cfg'))) +config.name = 'update_cc_test_checks.py example' +config.suffixes = ['.c', '.cpp'] +config.test_format = lit.formats.ShTest(execute_external=False) +config.test_source_root = os.path.dirname(__file__) +config.test_exec_root = os.path.dirname(__file__) diff --git a/clang/test/utils/update_cc_test_checks/Inputs/replace-value-regex-across-runs.c b/clang/test/utils/update_cc_test_checks/Inputs/replace-value-regex-across-runs.c new file mode 100644 index 0000000000000..8914a21953712 --- /dev/null +++ b/clang/test/utils/update_cc_test_checks/Inputs/replace-value-regex-across-runs.c @@ -0,0 +1,9 @@ +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm -o - %s | \ +// RUN: FileCheck %s +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm -o - %s | \ +// RUN: FileCheck %s + +void foo(void) { + #pragma omp target + ; +} diff --git a/clang/test/utils/update_cc_test_checks/Inputs/replace-value-regex-across-runs.c.expected b/clang/test/utils/update_cc_test_checks/Inputs/replace-value-regex-across-runs.c.expected new file mode 100644 index 0000000000000..ea3cc9480f6d3 --- /dev/null +++ b/clang/test/utils/update_cc_test_checks/Inputs/replace-value-regex-across-runs.c.expected @@ -0,0 +1,15 @@ +// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --replace-value-regex "__omp_offloading_[0-9a-z]+_[0-9a-z]+" +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm -o - %s | \ +// RUN: FileCheck %s +// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -fopenmp -emit-llvm -o - %s | \ +// RUN: FileCheck %s + +// CHECK-LABEL: @foo( +// CHECK-NEXT: entry: +// CHECK-NEXT: call void @{{__omp_offloading_[0-9a-z]+_[0-9a-z]+}}_foo_l7() #[[ATTR2:[0-9]+]] +// CHECK-NEXT: ret void +// +void foo(void) { + #pragma omp target + ; +} diff --git a/clang/test/utils/update_cc_test_checks/check-globals.test b/clang/test/utils/update_cc_test_checks/check-globals.test new file mode 100644 index 0000000000000..def1a8e936729 --- /dev/null +++ b/clang/test/utils/update_cc_test_checks/check-globals.test @@ -0,0 +1,84 @@ +RUN: rm -rf %t && mkdir %t + +# Check --check-globals in normal mode and in --include-generated-funcs mode. + +RUN: cp %S/Inputs/check-globals.c %t/norm.c +RUN: %update_cc_test_checks %t/norm.c --check-globals +RUN: FileCheck %s --input-file=%t/norm.c --match-full-lines -strict-whitespace \ +RUN: -check-prefixes=BOTH,NRM + +RUN: cp %S/Inputs/check-globals.c %t/igf.c +RUN: %update_cc_test_checks %t/igf.c --check-globals --include-generated-funcs +RUN: FileCheck %s --input-file=%t/igf.c --match-full-lines -strict-whitespace \ +RUN: -check-prefixes=BOTH,IGF + +# Check that repeating doesn't change it, such as duplicating '//.' occurrences. + +RUN: cp %t/norm.c %t/norm-again.c +RUN: %update_cc_test_checks %t/norm-again.c --check-globals +RUN: diff -u %t/norm.c %t/norm-again.c +RUN: rm %t/norm-again.c + +RUN: cp %t/igf.c %t/igf-again.c +RUN: %update_cc_test_checks %t/igf-again.c --check-globals \ +RUN: --include-generated-funcs +RUN: diff -u %t/igf.c %t/igf-again.c +RUN: rm %t/igf-again.c + +# Check that the generated directives actually work correctly. For example, +# they're not in the wrong order. + +RUN: cp %S/Inputs/lit.cfg.example %t/lit.cfg +# Show lit failures while avoiding confusing FileCheck input dump nesting. +RUN: %lit %t +# Lit was successful. Sanity-check the results with deterministic test order. +RUN: rm %t/.lit_test_times.txt +RUN: %lit %t 2>&1 | FileCheck -check-prefix=LIT-RUN %s + +END. + + BOTH-NOT:{{.}} + NRM:// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals + IGF:// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --check-globals --include-generated-funcs + BOTH-NEXT:// {{.*}} + BOTH-NEXT:// RUN: true + BOTH-NEXT:// RUN: %clang_cc1 -triple x86_64-pc-linux-gnu -emit-llvm -o - %s | FileCheck %s +BOTH-EMPTY: + IGF-NEXT:void foo() { + IGF-NEXT: static int i, j; + IGF-NEXT:} + IGF-NEXT:void bar() { + IGF-NEXT: static int i, j; + IGF-NEXT:} + BOTH-NEXT://. + BOTH-NEXT:// CHECK: @foo.i = internal global i32 0, align 4 + BOTH-NEXT:// CHECK: @foo.j = internal global i32 0, align 4 + BOTH-NEXT:// CHECK: @bar.i = internal global i32 0, align 4 + BOTH-NEXT:// CHECK: @bar.j = internal global i32 0, align 4 + BOTH-NEXT://. + BOTH-NEXT:// CHECK-LABEL: @foo( + BOTH-NEXT:// CHECK-NEXT: entry: + BOTH-NEXT:// CHECK-NEXT: ret void + BOTH-NEXT:// + NRM-NEXT:void foo() { + NRM-NEXT: static int i, j; + NRM-NEXT:} + IGF-NEXT:// + BOTH-NEXT:// CHECK-LABEL: @bar( + BOTH-NEXT:// CHECK-NEXT: entry: + BOTH-NEXT:// CHECK-NEXT: ret void + BOTH-NEXT:// + NRM-NEXT:void bar() { + NRM-NEXT: static int i, j; + NRM-NEXT:} + BOTH-NEXT://. + BOTH-NEXT:// CHECK: attributes {{.*}} + BOTH-NEXT://. + BOTH-NEXT:// CHECK: !0 = {{.*}} + BOTH-NEXT:// CHECK: !1 = {{.*}} + BOTH-NEXT://. + BOTH-NOT:{{.}} + +LIT-RUN: Testing: 2 tests +LIT-RUN: PASS: {{.*}} igf.c +LIT-RUN: PASS: {{.*}} norm.c diff --git a/clang/test/utils/update_cc_test_checks/lit.local.cfg b/clang/test/utils/update_cc_test_checks/lit.local.cfg index 3fef02881853d..cbcc05dff4ca7 100644 --- a/clang/test/utils/update_cc_test_checks/lit.local.cfg +++ b/clang/test/utils/update_cc_test_checks/lit.local.cfg @@ -19,9 +19,13 @@ extra_args += ' --opt ' + shell_quote(opt_path) script_path = os.path.join(config.llvm_src_root, 'utils', 'update_cc_test_checks.py') assert os.path.isfile(script_path) +lit = shell_quote(os.path.join(config.llvm_src_root, 'utils', 'lit', 'lit.py')) +python = shell_quote(config.python_executable) config.substitutions.append( ('%update_cc_test_checks', "%s %s %s" % ( - shell_quote(config.python_executable), shell_quote(script_path), - extra_args))) + python, shell_quote(script_path), extra_args))) config.substitutions.append( ('%clang_tools_dir', shell_quote(config.clang_tools_dir))) +config.substitutions.append( + ('%lit', "%s %s -Dclang_lit_site_cfg=%s -j1 -vv" % ( + python, lit, shell_quote(config.clang_lit_site_cfg)))) diff --git a/clang/test/utils/update_cc_test_checks/replace-value-regex-across-runs.test b/clang/test/utils/update_cc_test_checks/replace-value-regex-across-runs.test new file mode 100644 index 0000000000000..c2fdf6113fc2f --- /dev/null +++ b/clang/test/utils/update_cc_test_checks/replace-value-regex-across-runs.test @@ -0,0 +1,7 @@ +# Test that --replace-value-regex is applied correctly when multiple RUN lines +# use the same FileCheck prefix and have the same output. + +RUN: cp %S/Inputs/replace-value-regex-across-runs.c %t.c +RUN: %update_cc_test_checks %t.c \ +RUN: --replace-value-regex '__omp_offloading_[0-9a-z]+_[0-9a-z]+' +RUN: diff -u %S/Inputs/replace-value-regex-across-runs.c.expected %t.c diff --git a/clang/tools/CMakeLists.txt b/clang/tools/CMakeLists.txt index f542e61106c17..fb8c77c3e144d 100644 --- a/clang/tools/CMakeLists.txt +++ b/clang/tools/CMakeLists.txt @@ -34,6 +34,7 @@ if(CLANG_ENABLE_STATIC_ANALYZER) add_clang_subdirectory(clang-check) add_clang_subdirectory(clang-extdef-mapping) add_clang_subdirectory(scan-build) + add_clang_subdirectory(scan-build-py) add_clang_subdirectory(scan-view) endif() diff --git a/clang/tools/clang-format/ClangFormat.cpp b/clang/tools/clang-format/ClangFormat.cpp index c51d9f4ea1f06..144e87f78c649 100644 --- a/clang/tools/clang-format/ClangFormat.cpp +++ b/clang/tools/clang-format/ClangFormat.cpp @@ -411,6 +411,17 @@ static bool format(StringRef FileName) { unsigned CursorPosition = Cursor; Replacements Replaces = sortIncludes(*FormatStyle, Code->getBuffer(), Ranges, AssumedFileName, &CursorPosition); + + // To format JSON insert a variable to trick the code into thinking its + // JavaScript. + if (FormatStyle->isJson()) { + auto Err = Replaces.add(tooling::Replacement( + tooling::Replacement(AssumedFileName, 0, 0, "x = "))); + if (Err) { + llvm::errs() << "Bad Json variable insertion\n"; + } + } + auto ChangedCode = tooling::applyAllReplacements(Code->getBuffer(), Replaces); if (!ChangedCode) { llvm::errs() << llvm::toString(ChangedCode.takeError()) << "\n"; @@ -506,7 +517,8 @@ int main(int argc, const char **argv) { cl::SetVersionPrinter(PrintVersion); cl::ParseCommandLineOptions( argc, argv, - "A tool to format C/C++/Java/JavaScript/Objective-C/Protobuf/C# code.\n\n" + "A tool to format C/C++/Java/JavaScript/JSON/Objective-C/Protobuf/C# " + "code.\n\n" "If no arguments are specified, it formats the code from standard input\n" "and writes the result to the standard output.\n" "If s are given, it reformats the files. If -i is specified\n" diff --git a/clang/tools/clang-format/clang-format-diff.py b/clang/tools/clang-format/clang-format-diff.py index 4edc0b2e19c29..ea483f59e96b4 100755 --- a/clang/tools/clang-format/clang-format-diff.py +++ b/clang/tools/clang-format/clang-format-diff.py @@ -48,7 +48,7 @@ def main(): '(case sensitive, overrides -iregex)') parser.add_argument('-iregex', metavar='PATTERN', default= r'.*\.(cpp|cc|c\+\+|cxx|c|cl|h|hh|hpp|hxx|m|mm|inc|js|ts' - r'|proto|protodevel|java|cs)', + r'|proto|protodevel|java|cs|json)', help='custom pattern selecting file paths to reformat ' '(case insensitive, overridden by -regex)') parser.add_argument('-sort-includes', action='store_true', default=False, diff --git a/clang/tools/clang-format/git-clang-format b/clang/tools/clang-format/git-clang-format index 3646b4ff41d7f..0233ceb3a868d 100755 --- a/clang/tools/clang-format/git-clang-format +++ b/clang/tools/clang-format/git-clang-format @@ -85,6 +85,7 @@ def main(): 'js', # JavaScript 'ts', # TypeScript 'cs', # C Sharp + 'json', # Json ]) p = argparse.ArgumentParser( diff --git a/clang/tools/clang-refactor/TestSupport.cpp b/clang/tools/clang-refactor/TestSupport.cpp index 34499b93c97ea..eb880889749f5 100644 --- a/clang/tools/clang-refactor/TestSupport.cpp +++ b/clang/tools/clang-refactor/TestSupport.cpp @@ -328,8 +328,8 @@ findTestSelectionRanges(StringRef Filename) { // Try to detect mistyped 'range:' comments to ensure tests don't miss // anything. auto DetectMistypedCommand = [&]() -> bool { - if (Comment.contains_lower("range") && Comment.contains("=") && - !Comment.contains_lower("run") && !Comment.contains("CHECK")) { + if (Comment.contains_insensitive("range") && Comment.contains("=") && + !Comment.contains_insensitive("run") && !Comment.contains("CHECK")) { llvm::errs() << "error: suspicious comment '" << Comment << "' that " "resembles the range command found\n"; diff --git a/clang/tools/clang-scan-deps/ClangScanDeps.cpp b/clang/tools/clang-scan-deps/ClangScanDeps.cpp index 49c4757686623..b6533361c5296 100644 --- a/clang/tools/clang-scan-deps/ClangScanDeps.cpp +++ b/clang/tools/clang-scan-deps/ClangScanDeps.cpp @@ -465,7 +465,7 @@ int main(int argc, const char **argv) { auto FlagsEnd = llvm::find(Args, "--"); if (FlagsEnd != Args.begin()) { ClangCLMode = - llvm::sys::path::stem(Args[0]).contains_lower("clang-cl") || + llvm::sys::path::stem(Args[0]).contains_insensitive("clang-cl") || llvm::is_contained(Args, "--driver-mode=cl"); // Reverse scan, starting at the end or at the element before "--". diff --git a/clang/tools/driver/cc1as_main.cpp b/clang/tools/driver/cc1as_main.cpp index 9f6a58b634b4b..086ce0ea77875 100644 --- a/clang/tools/driver/cc1as_main.cpp +++ b/clang/tools/driver/cc1as_main.cpp @@ -585,7 +585,7 @@ int cc1as_main(ArrayRef Argv, const char *Argv0, void *MainAddr) { return 1; if (Asm.ShowHelp) { - getDriverOptTable().PrintHelp( + getDriverOptTable().printHelp( llvm::outs(), "clang -cc1as [options] file...", "Clang Integrated Assembler", /*Include=*/driver::options::CC1AsOption, /*Exclude=*/0, diff --git a/clang/tools/driver/driver.cpp b/clang/tools/driver/driver.cpp index ee3ffe3012d1e..9e3f51db21a9d 100644 --- a/clang/tools/driver/driver.cpp +++ b/clang/tools/driver/driver.cpp @@ -273,7 +273,7 @@ static void FixupDiagPrefixExeName(TextDiagnosticPrinter *DiagClient, // If the clang binary happens to be named cl.exe for compatibility reasons, // use clang-cl.exe as the prefix to avoid confusion between clang and MSVC. StringRef ExeBasename(llvm::sys::path::stem(Path)); - if (ExeBasename.equals_lower("cl")) + if (ExeBasename.equals_insensitive("cl")) ExeBasename = "clang-cl"; DiagClient->setPrefix(std::string(ExeBasename)); } diff --git a/clang/tools/libclang/CIndexCodeCompletion.cpp b/clang/tools/libclang/CIndexCodeCompletion.cpp index 6685c892749ea..68f35c41efd74 100644 --- a/clang/tools/libclang/CIndexCodeCompletion.cpp +++ b/clang/tools/libclang/CIndexCodeCompletion.cpp @@ -1026,7 +1026,7 @@ namespace { if (XText.empty() || YText.empty()) return !XText.empty(); - int result = XText.compare_lower(YText); + int result = XText.compare_insensitive(YText); if (result < 0) return true; if (result > 0) diff --git a/clang/tools/scan-build-py/CMakeLists.txt b/clang/tools/scan-build-py/CMakeLists.txt new file mode 100644 index 0000000000000..c9f1cb7d6b2a7 --- /dev/null +++ b/clang/tools/scan-build-py/CMakeLists.txt @@ -0,0 +1,132 @@ +set (BinFiles + "analyze-build" + "intercept-build" + "scan-build") + +set (LibExecs + "analyze-c++" + "analyze-cc" + "intercept-c++" + "intercept-cc") + +set (LibScanbuild + "__init__.py" + "analyze.py" + "arguments.py" + "clang.py" + "compilation.py" + "intercept.py" + "report.py" + "shell.py") + +set (LibScanbuildResources + "scanview.css" + "selectable.js" + "sorttable.js") + +# libear is compiled dynamically in build_libear using the specified cc +# compiler. +set (LibEar + "__init__.py" + "config.h.in" + "ear.c") + +foreach(BinFile ${BinFiles}) + if ("${BinFile}" STREQUAL "scan-build") + # Need to rename scan-build to scan-build-py to prevent overwriting + # scan-build Perl implementation. + add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/bin/scan-build-py + COMMAND ${CMAKE_COMMAND} -E make_directory + ${CMAKE_BINARY_DIR}/bin + COMMAND ${CMAKE_COMMAND} -E copy + ${CMAKE_CURRENT_SOURCE_DIR}/bin/scan-build + ${CMAKE_BINARY_DIR}/bin/scan-build-py + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/bin/scan-build) + install (PROGRAMS "bin/scan-build" + DESTINATION bin + RENAME scan-build-py + COMPONENT scan-build-py) + list(APPEND Depends ${CMAKE_BINARY_DIR}/bin/scan-build-py) + else() + add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/bin/${BinFile} + COMMAND ${CMAKE_COMMAND} -E make_directory + ${CMAKE_BINARY_DIR}/bin + COMMAND ${CMAKE_COMMAND} -E copy + ${CMAKE_CURRENT_SOURCE_DIR}/bin/${BinFile} + ${CMAKE_BINARY_DIR}/bin/ + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/bin/${BinFile}) + install(PROGRAMS bin/${BinFile} + DESTINATION bin + COMPONENT scan-build-py) + list(APPEND Depends ${CMAKE_BINARY_DIR}/bin/${BinFile}) + endif() +endforeach() + +foreach(lib ${LibExecs}) + add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/libexec/${lib} + COMMAND ${CMAKE_COMMAND} -E make_directory + ${CMAKE_BINARY_DIR}/libexec + COMMAND ${CMAKE_COMMAND} -E copy + ${CMAKE_CURRENT_SOURCE_DIR}/libexec/${lib} + ${CMAKE_BINARY_DIR}/libexec/ + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/libexec/${lib}) + list(APPEND Depends ${CMAKE_BINARY_DIR}/libexec/${lib}) + install(PROGRAMS libexec/${lib} + DESTINATION libexec + COMPONENT scan-build-py) +endforeach() + +foreach(lib ${LibScanbuild}) + add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/lib/libscanbuild/${lib} + COMMAND ${CMAKE_COMMAND} -E make_directory + ${CMAKE_BINARY_DIR}/lib + COMMAND ${CMAKE_COMMAND} -E make_directory + ${CMAKE_BINARY_DIR}/lib/libscanbuild + COMMAND ${CMAKE_COMMAND} -E copy + ${CMAKE_CURRENT_SOURCE_DIR}/lib/libscanbuild/${lib} + ${CMAKE_BINARY_DIR}/lib/libscanbuild/ + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/lib/libscanbuild/${lib}) + list(APPEND Depends ${CMAKE_BINARY_DIR}/lib/libscanbuild/${lib}) + install(PROGRAMS lib/libscanbuild/${lib} + DESTINATION lib/libscanbuild + COMPONENT scan-build-py) +endforeach() + +foreach(resource ${LibScanbuildResources}) + add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/lib/libscanbuild/resources/${resource} + COMMAND ${CMAKE_COMMAND} -E make_directory + ${CMAKE_BINARY_DIR}/lib + COMMAND ${CMAKE_COMMAND} -E make_directory + ${CMAKE_BINARY_DIR}/lib/libscanbuild + COMMAND ${CMAKE_COMMAND} -E make_directory + ${CMAKE_BINARY_DIR}/lib/libscanbuild/resources + COMMAND ${CMAKE_COMMAND} -E copy + ${CMAKE_CURRENT_SOURCE_DIR}/lib/libscanbuild/resources/${resource} + ${CMAKE_BINARY_DIR}/lib/libscanbuild/resources + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/lib/libscanbuild/resources/${resource}) + list(APPEND Depends ${CMAKE_BINARY_DIR}/lib/libscanbuild/resources/${resource}) + install(PROGRAMS lib/libscanbuild/resources/${resource} + DESTINATION lib/libscanbuild/resources + COMPONENT scan-build-py) +endforeach() + +foreach(lib ${LibEar}) + add_custom_command(OUTPUT ${CMAKE_BINARY_DIR}/lib/libear/${lib} + COMMAND ${CMAKE_COMMAND} -E make_directory + ${CMAKE_BINARY_DIR}/lib + COMMAND ${CMAKE_COMMAND} -E make_directory + ${CMAKE_BINARY_DIR}/lib/libear + COMMAND ${CMAKE_COMMAND} -E copy + ${CMAKE_CURRENT_SOURCE_DIR}/lib/libear/${lib} + ${CMAKE_BINARY_DIR}/lib/libear/ + DEPENDS ${CMAKE_CURRENT_SOURCE_DIR}/lib/libear/${lib}) + list(APPEND Depends ${CMAKE_BINARY_DIR}/lib/libear/${lib}) + install(PROGRAMS lib/libear/${lib} + DESTINATION lib/libear + COMPONENT scan-build-py) +endforeach() + +add_custom_target(scan-build-py ALL DEPENDS ${Depends}) +add_llvm_install_targets("install-scan-build-py" + DEPENDS scan-build-py + COMPONENT scan-build-py) diff --git a/clang/tools/scan-build-py/bin/analyze-build b/clang/tools/scan-build-py/bin/analyze-build index 0884ef2234bf4..b3f61429906c4 100755 --- a/clang/tools/scan-build-py/bin/analyze-build +++ b/clang/tools/scan-build-py/bin/analyze-build @@ -8,7 +8,7 @@ import multiprocessing import sys import os.path this_dir = os.path.dirname(os.path.realpath(__file__)) -sys.path.append(os.path.dirname(this_dir)) +sys.path.append(os.path.join(os.path.dirname(this_dir), 'lib')) from libscanbuild.analyze import analyze_build diff --git a/clang/tools/scan-build-py/bin/intercept-build b/clang/tools/scan-build-py/bin/intercept-build index d9757b77b5c73..9ecde39984434 100755 --- a/clang/tools/scan-build-py/bin/intercept-build +++ b/clang/tools/scan-build-py/bin/intercept-build @@ -8,7 +8,7 @@ import multiprocessing import sys import os.path this_dir = os.path.dirname(os.path.realpath(__file__)) -sys.path.append(os.path.dirname(this_dir)) +sys.path.append(os.path.join(os.path.dirname(this_dir), 'lib')) from libscanbuild.intercept import intercept_build diff --git a/clang/tools/scan-build-py/bin/scan-build b/clang/tools/scan-build-py/bin/scan-build index be4e51887e30b..a341751d993a2 100755 --- a/clang/tools/scan-build-py/bin/scan-build +++ b/clang/tools/scan-build-py/bin/scan-build @@ -8,7 +8,7 @@ import multiprocessing import sys import os.path this_dir = os.path.dirname(os.path.realpath(__file__)) -sys.path.append(os.path.dirname(this_dir)) +sys.path.append(os.path.join(os.path.dirname(this_dir), 'lib')) from libscanbuild.analyze import scan_build diff --git a/clang/tools/scan-build-py/libear/__init__.py b/clang/tools/scan-build-py/lib/libear/__init__.py similarity index 100% rename from clang/tools/scan-build-py/libear/__init__.py rename to clang/tools/scan-build-py/lib/libear/__init__.py diff --git a/clang/tools/scan-build-py/libear/config.h.in b/clang/tools/scan-build-py/lib/libear/config.h.in similarity index 100% rename from clang/tools/scan-build-py/libear/config.h.in rename to clang/tools/scan-build-py/lib/libear/config.h.in diff --git a/clang/tools/scan-build-py/lib/libear/ear.c b/clang/tools/scan-build-py/lib/libear/ear.c new file mode 100644 index 0000000000000..b06ec7ab00040 --- /dev/null +++ b/clang/tools/scan-build-py/lib/libear/ear.c @@ -0,0 +1,601 @@ +/* -*- coding: utf-8 -*- +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +*/ + +/** + * This file implements a shared library. This library can be pre-loaded by + * the dynamic linker of the Operating System (OS). It implements a few function + * related to process creation. By pre-load this library the executed process + * uses these functions instead of those from the standard library. + * + * The idea here is to inject a logic before call the real methods. The logic is + * to dump the call into a file. To call the real method this library is doing + * the job of the dynamic linker. + * + * The only input for the log writing is about the destination directory. + * This is passed as environment variable. + */ + +// NOLINTNEXTLINE +#include "config.h" + +#include +#include +#include +#include +#include +#include +#include +#include + +#if defined HAVE_POSIX_SPAWN || defined HAVE_POSIX_SPAWNP +#include +#endif + +#if defined HAVE_NSGETENVIRON +#include +#else +extern char **environ; +#endif + +#define ENV_OUTPUT "INTERCEPT_BUILD_TARGET_DIR" +#ifdef APPLE +#define ENV_FLAT "DYLD_FORCE_FLAT_NAMESPACE" +#define ENV_PRELOAD "DYLD_INSERT_LIBRARIES" +#define ENV_SIZE 3 +#else +#define ENV_PRELOAD "LD_PRELOAD" +#define ENV_SIZE 2 +#endif + +#define DLSYM(TYPE_, VAR_, SYMBOL_) \ + union { \ + void *from; \ + TYPE_ to; \ + } cast; \ + if (0 == (cast.from = dlsym(RTLD_NEXT, SYMBOL_))) { \ + perror("bear: dlsym"); \ + exit(EXIT_FAILURE); \ + } \ + TYPE_ const VAR_ = cast.to; + +typedef char const *bear_env_t[ENV_SIZE]; + +static int bear_capture_env_t(bear_env_t *env); +static int bear_reset_env_t(bear_env_t *env); +static void bear_release_env_t(bear_env_t *env); +static char const **bear_update_environment(char *const envp[], + bear_env_t *env); +static char const **bear_update_environ(char const **in, char const *key, + char const *value); +static char **bear_get_environment(); +static void bear_report_call(char const *fun, char const *const argv[]); +static char const **bear_strings_build(char const *arg, va_list *ap); +static char const **bear_strings_copy(char const **const in); +static char const **bear_strings_append(char const **in, char const *e); +static size_t bear_strings_length(char const *const *in); +static void bear_strings_release(char const **); + +static bear_env_t env_names = {ENV_OUTPUT, ENV_PRELOAD +#ifdef ENV_FLAT + , + ENV_FLAT +#endif +}; + +static bear_env_t initial_env = {0, 0 +#ifdef ENV_FLAT + , + 0 +#endif +}; + +static int initialized = 0; +static pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; + +static void on_load(void) __attribute__((constructor)); +static void on_unload(void) __attribute__((destructor)); + +#ifdef HAVE_EXECVE +static int call_execve(const char *path, char *const argv[], + char *const envp[]); +#endif +#ifdef HAVE_EXECVP +static int call_execvp(const char *file, char *const argv[]); +#endif +#ifdef HAVE_EXECVPE +static int call_execvpe(const char *file, char *const argv[], + char *const envp[]); +#endif +#ifdef HAVE_EXECVP2 +static int call_execvP(const char *file, const char *search_path, + char *const argv[]); +#endif +#ifdef HAVE_EXECT +static int call_exect(const char *path, char *const argv[], char *const envp[]); +#endif +#ifdef HAVE_POSIX_SPAWN +static int call_posix_spawn(pid_t *restrict pid, const char *restrict path, + const posix_spawn_file_actions_t *file_actions, + const posix_spawnattr_t *restrict attrp, + char *const argv[restrict], + char *const envp[restrict]); +#endif +#ifdef HAVE_POSIX_SPAWNP +static int call_posix_spawnp(pid_t *restrict pid, const char *restrict file, + const posix_spawn_file_actions_t *file_actions, + const posix_spawnattr_t *restrict attrp, + char *const argv[restrict], + char *const envp[restrict]); +#endif + +/* Initialization method to Captures the relevant environment variables. + */ + +static void on_load(void) { + pthread_mutex_lock(&mutex); + if (!initialized) + initialized = bear_capture_env_t(&initial_env); + pthread_mutex_unlock(&mutex); +} + +static void on_unload(void) { + pthread_mutex_lock(&mutex); + bear_release_env_t(&initial_env); + initialized = 0; + pthread_mutex_unlock(&mutex); +} + +/* These are the methods we are try to hijack. + */ + +#ifdef HAVE_EXECVE +int execve(const char *path, char *const argv[], char *const envp[]) { + bear_report_call(__func__, (char const *const *)argv); + return call_execve(path, argv, envp); +} +#endif + +#ifdef HAVE_EXECV +#ifndef HAVE_EXECVE +#error can not implement execv without execve +#endif +int execv(const char *path, char *const argv[]) { + bear_report_call(__func__, (char const *const *)argv); + char *const *envp = bear_get_environment(); + return call_execve(path, argv, envp); +} +#endif + +#ifdef HAVE_EXECVPE +int execvpe(const char *file, char *const argv[], char *const envp[]) { + bear_report_call(__func__, (char const *const *)argv); + return call_execvpe(file, argv, envp); +} +#endif + +#ifdef HAVE_EXECVP +int execvp(const char *file, char *const argv[]) { + bear_report_call(__func__, (char const *const *)argv); + return call_execvp(file, argv); +} +#endif + +#ifdef HAVE_EXECVP2 +int execvP(const char *file, const char *search_path, char *const argv[]) { + bear_report_call(__func__, (char const *const *)argv); + return call_execvP(file, search_path, argv); +} +#endif + +#ifdef HAVE_EXECT +int exect(const char *path, char *const argv[], char *const envp[]) { + bear_report_call(__func__, (char const *const *)argv); + return call_exect(path, argv, envp); +} +#endif + +#ifdef HAVE_EXECL +#ifndef HAVE_EXECVE +#error can not implement execl without execve +#endif +int execl(const char *path, const char *arg, ...) { + va_list args; + va_start(args, arg); + char const **argv = bear_strings_build(arg, &args); + va_end(args); + + bear_report_call(__func__, (char const *const *)argv); + char *const *envp = bear_get_environment(); + int const result = call_execve(path, (char *const *)argv, envp); + + bear_strings_release(argv); + return result; +} +#endif + +#ifdef HAVE_EXECLP +#ifndef HAVE_EXECVP +#error can not implement execlp without execvp +#endif +int execlp(const char *file, const char *arg, ...) { + va_list args; + va_start(args, arg); + char const **argv = bear_strings_build(arg, &args); + va_end(args); + + bear_report_call(__func__, (char const *const *)argv); + int const result = call_execvp(file, (char *const *)argv); + + bear_strings_release(argv); + return result; +} +#endif + +#ifdef HAVE_EXECLE +#ifndef HAVE_EXECVE +#error can not implement execle without execve +#endif +// int execle(const char *path, const char *arg, ..., char * const envp[]); +int execle(const char *path, const char *arg, ...) { + va_list args; + va_start(args, arg); + char const **argv = bear_strings_build(arg, &args); + char const **envp = va_arg(args, char const **); + va_end(args); + + bear_report_call(__func__, (char const *const *)argv); + int const result = + call_execve(path, (char *const *)argv, (char *const *)envp); + + bear_strings_release(argv); + return result; +} +#endif + +#ifdef HAVE_POSIX_SPAWN +int posix_spawn(pid_t *restrict pid, const char *restrict path, + const posix_spawn_file_actions_t *file_actions, + const posix_spawnattr_t *restrict attrp, + char *const argv[restrict], char *const envp[restrict]) { + bear_report_call(__func__, (char const *const *)argv); + return call_posix_spawn(pid, path, file_actions, attrp, argv, envp); +} +#endif + +#ifdef HAVE_POSIX_SPAWNP +int posix_spawnp(pid_t *restrict pid, const char *restrict file, + const posix_spawn_file_actions_t *file_actions, + const posix_spawnattr_t *restrict attrp, + char *const argv[restrict], char *const envp[restrict]) { + bear_report_call(__func__, (char const *const *)argv); + return call_posix_spawnp(pid, file, file_actions, attrp, argv, envp); +} +#endif + +/* These are the methods which forward the call to the standard implementation. + */ + +#ifdef HAVE_EXECVE +static int call_execve(const char *path, char *const argv[], + char *const envp[]) { + typedef int (*func)(const char *, char *const *, char *const *); + + DLSYM(func, fp, "execve"); + + char const **const menvp = bear_update_environment(envp, &initial_env); + int const result = (*fp)(path, argv, (char *const *)menvp); + bear_strings_release(menvp); + return result; +} +#endif + +#ifdef HAVE_EXECVPE +static int call_execvpe(const char *file, char *const argv[], + char *const envp[]) { + typedef int (*func)(const char *, char *const *, char *const *); + + DLSYM(func, fp, "execvpe"); + + char const **const menvp = bear_update_environment(envp, &initial_env); + int const result = (*fp)(file, argv, (char *const *)menvp); + bear_strings_release(menvp); + return result; +} +#endif + +#ifdef HAVE_EXECVP +static int call_execvp(const char *file, char *const argv[]) { + typedef int (*func)(const char *file, char *const argv[]); + + DLSYM(func, fp, "execvp"); + + bear_env_t current_env; + bear_capture_env_t(¤t_env); + bear_reset_env_t(&initial_env); + int const result = (*fp)(file, argv); + bear_reset_env_t(¤t_env); + bear_release_env_t(¤t_env); + + return result; +} +#endif + +#ifdef HAVE_EXECVP2 +static int call_execvP(const char *file, const char *search_path, + char *const argv[]) { + typedef int (*func)(const char *, const char *, char *const *); + + DLSYM(func, fp, "execvP"); + + bear_env_t current_env; + bear_capture_env_t(¤t_env); + bear_reset_env_t(&initial_env); + int const result = (*fp)(file, search_path, argv); + bear_reset_env_t(¤t_env); + bear_release_env_t(¤t_env); + + return result; +} +#endif + +#ifdef HAVE_EXECT +static int call_exect(const char *path, char *const argv[], + char *const envp[]) { + typedef int (*func)(const char *, char *const *, char *const *); + + DLSYM(func, fp, "exect"); + + char const **const menvp = bear_update_environment(envp, &initial_env); + int const result = (*fp)(path, argv, (char *const *)menvp); + bear_strings_release(menvp); + return result; +} +#endif + +#ifdef HAVE_POSIX_SPAWN +static int call_posix_spawn(pid_t *restrict pid, const char *restrict path, + const posix_spawn_file_actions_t *file_actions, + const posix_spawnattr_t *restrict attrp, + char *const argv[restrict], + char *const envp[restrict]) { + typedef int (*func)(pid_t *restrict, const char *restrict, + const posix_spawn_file_actions_t *, + const posix_spawnattr_t *restrict, char *const *restrict, + char *const *restrict); + + DLSYM(func, fp, "posix_spawn"); + + char const **const menvp = bear_update_environment(envp, &initial_env); + int const result = + (*fp)(pid, path, file_actions, attrp, argv, (char *const *restrict)menvp); + bear_strings_release(menvp); + return result; +} +#endif + +#ifdef HAVE_POSIX_SPAWNP +static int call_posix_spawnp(pid_t *restrict pid, const char *restrict file, + const posix_spawn_file_actions_t *file_actions, + const posix_spawnattr_t *restrict attrp, + char *const argv[restrict], + char *const envp[restrict]) { + typedef int (*func)(pid_t *restrict, const char *restrict, + const posix_spawn_file_actions_t *, + const posix_spawnattr_t *restrict, char *const *restrict, + char *const *restrict); + + DLSYM(func, fp, "posix_spawnp"); + + char const **const menvp = bear_update_environment(envp, &initial_env); + int const result = + (*fp)(pid, file, file_actions, attrp, argv, (char *const *restrict)menvp); + bear_strings_release(menvp); + return result; +} +#endif + +/* this method is to write log about the process creation. */ + +static void bear_report_call(char const *fun, char const *const argv[]) { + static int const GS = 0x1d; + static int const RS = 0x1e; + static int const US = 0x1f; + + if (!initialized) + return; + + pthread_mutex_lock(&mutex); + const char *cwd = getcwd(NULL, 0); + if (0 == cwd) { + perror("bear: getcwd"); + exit(EXIT_FAILURE); + } + char const *const out_dir = initial_env[0]; + size_t const path_max_length = strlen(out_dir) + 32; + char filename[path_max_length]; + if (-1 == + snprintf(filename, path_max_length, "%s/%d.cmd", out_dir, getpid())) { + perror("bear: snprintf"); + exit(EXIT_FAILURE); + } + FILE *fd = fopen(filename, "a+"); + if (0 == fd) { + perror("bear: fopen"); + exit(EXIT_FAILURE); + } + fprintf(fd, "%d%c", getpid(), RS); + fprintf(fd, "%d%c", getppid(), RS); + fprintf(fd, "%s%c", fun, RS); + fprintf(fd, "%s%c", cwd, RS); + size_t const argc = bear_strings_length(argv); + for (size_t it = 0; it < argc; ++it) { + fprintf(fd, "%s%c", argv[it], US); + } + fprintf(fd, "%c", GS); + if (fclose(fd)) { + perror("bear: fclose"); + exit(EXIT_FAILURE); + } + free((void *)cwd); + pthread_mutex_unlock(&mutex); +} + +/* update environment assure that chilren processes will copy the desired + * behaviour */ + +static int bear_capture_env_t(bear_env_t *env) { + int status = 1; + for (size_t it = 0; it < ENV_SIZE; ++it) { + char const *const env_value = getenv(env_names[it]); + char const *const env_copy = (env_value) ? strdup(env_value) : env_value; + (*env)[it] = env_copy; + status &= (env_copy) ? 1 : 0; + } + return status; +} + +static int bear_reset_env_t(bear_env_t *env) { + int status = 1; + for (size_t it = 0; it < ENV_SIZE; ++it) { + if ((*env)[it]) { + setenv(env_names[it], (*env)[it], 1); + } else { + unsetenv(env_names[it]); + } + } + return status; +} + +static void bear_release_env_t(bear_env_t *env) { + for (size_t it = 0; it < ENV_SIZE; ++it) { + free((void *)(*env)[it]); + (*env)[it] = 0; + } +} + +static char const **bear_update_environment(char *const envp[], + bear_env_t *env) { + char const **result = bear_strings_copy((char const **)envp); + for (size_t it = 0; it < ENV_SIZE && (*env)[it]; ++it) + result = bear_update_environ(result, env_names[it], (*env)[it]); + return result; +} + +static char const **bear_update_environ(char const *envs[], char const *key, + char const *const value) { + // find the key if it's there + size_t const key_length = strlen(key); + char const **it = envs; + for (; (it) && (*it); ++it) { + if ((0 == strncmp(*it, key, key_length)) && (strlen(*it) > key_length) && + ('=' == (*it)[key_length])) + break; + } + // allocate a environment entry + size_t const value_length = strlen(value); + size_t const env_length = key_length + value_length + 2; + char *env = malloc(env_length); + if (0 == env) { + perror("bear: malloc [in env_update]"); + exit(EXIT_FAILURE); + } + if (-1 == snprintf(env, env_length, "%s=%s", key, value)) { + perror("bear: snprintf"); + exit(EXIT_FAILURE); + } + // replace or append the environment entry + if (it && *it) { + free((void *)*it); + *it = env; + return envs; + } + return bear_strings_append(envs, env); +} + +static char **bear_get_environment() { +#if defined HAVE_NSGETENVIRON + return *_NSGetEnviron(); +#else + return environ; +#endif +} + +/* util methods to deal with string arrays. environment and process arguments + * are both represented as string arrays. */ + +static char const **bear_strings_build(char const *const arg, va_list *args) { + char const **result = 0; + size_t size = 0; + for (char const *it = arg; it; it = va_arg(*args, char const *)) { + result = realloc(result, (size + 1) * sizeof(char const *)); + if (0 == result) { + perror("bear: realloc"); + exit(EXIT_FAILURE); + } + char const *copy = strdup(it); + if (0 == copy) { + perror("bear: strdup"); + exit(EXIT_FAILURE); + } + result[size++] = copy; + } + result = realloc(result, (size + 1) * sizeof(char const *)); + if (0 == result) { + perror("bear: realloc"); + exit(EXIT_FAILURE); + } + result[size++] = 0; + + return result; +} + +static char const **bear_strings_copy(char const **const in) { + size_t const size = bear_strings_length(in); + + char const **const result = malloc((size + 1) * sizeof(char const *)); + if (0 == result) { + perror("bear: malloc"); + exit(EXIT_FAILURE); + } + + char const **out_it = result; + for (char const *const *in_it = in; (in_it) && (*in_it); ++in_it, ++out_it) { + *out_it = strdup(*in_it); + if (0 == *out_it) { + perror("bear: strdup"); + exit(EXIT_FAILURE); + } + } + *out_it = 0; + return result; +} + +static char const **bear_strings_append(char const **const in, + char const *const e) { + size_t size = bear_strings_length(in); + char const **result = realloc(in, (size + 2) * sizeof(char const *)); + if (0 == result) { + perror("bear: realloc"); + exit(EXIT_FAILURE); + } + result[size++] = e; + result[size++] = 0; + return result; +} + +static size_t bear_strings_length(char const *const *const in) { + size_t result = 0; + for (char const *const *it = in; (it) && (*it); ++it) + ++result; + return result; +} + +static void bear_strings_release(char const **in) { + for (char const *const *it = in; (it) && (*it); ++it) { + free((void *)*it); + } + free((void *)in); +} \ No newline at end of file diff --git a/clang/tools/scan-build-py/libscanbuild/__init__.py b/clang/tools/scan-build-py/lib/libscanbuild/__init__.py similarity index 100% rename from clang/tools/scan-build-py/libscanbuild/__init__.py rename to clang/tools/scan-build-py/lib/libscanbuild/__init__.py diff --git a/clang/tools/scan-build-py/libscanbuild/analyze.py b/clang/tools/scan-build-py/lib/libscanbuild/analyze.py similarity index 100% rename from clang/tools/scan-build-py/libscanbuild/analyze.py rename to clang/tools/scan-build-py/lib/libscanbuild/analyze.py diff --git a/clang/tools/scan-build-py/libscanbuild/arguments.py b/clang/tools/scan-build-py/lib/libscanbuild/arguments.py similarity index 100% rename from clang/tools/scan-build-py/libscanbuild/arguments.py rename to clang/tools/scan-build-py/lib/libscanbuild/arguments.py diff --git a/clang/tools/scan-build-py/libscanbuild/clang.py b/clang/tools/scan-build-py/lib/libscanbuild/clang.py similarity index 100% rename from clang/tools/scan-build-py/libscanbuild/clang.py rename to clang/tools/scan-build-py/lib/libscanbuild/clang.py diff --git a/clang/tools/scan-build-py/libscanbuild/compilation.py b/clang/tools/scan-build-py/lib/libscanbuild/compilation.py similarity index 100% rename from clang/tools/scan-build-py/libscanbuild/compilation.py rename to clang/tools/scan-build-py/lib/libscanbuild/compilation.py diff --git a/clang/tools/scan-build-py/libscanbuild/intercept.py b/clang/tools/scan-build-py/lib/libscanbuild/intercept.py similarity index 100% rename from clang/tools/scan-build-py/libscanbuild/intercept.py rename to clang/tools/scan-build-py/lib/libscanbuild/intercept.py diff --git a/clang/tools/scan-build-py/libscanbuild/report.py b/clang/tools/scan-build-py/lib/libscanbuild/report.py similarity index 97% rename from clang/tools/scan-build-py/libscanbuild/report.py rename to clang/tools/scan-build-py/lib/libscanbuild/report.py index 46338b86d26d6..729b25e6350f3 100644 --- a/clang/tools/scan-build-py/libscanbuild/report.py +++ b/clang/tools/scan-build-py/lib/libscanbuild/report.py @@ -379,21 +379,22 @@ def match_and_update_run(message, runs_count_offset): def parse_bug_plist(filename): """ Returns the generator of bugs from a single .plist file. """ - content = plistlib.readPlist(filename) - files = content.get('files') - for bug in content.get('diagnostics', []): - if len(files) <= int(bug['location']['file']): - logging.warning('Parsing bug from "%s" failed', filename) - continue - - yield { - 'result': filename, - 'bug_type': bug['type'], - 'bug_category': bug['category'], - 'bug_line': int(bug['location']['line']), - 'bug_path_length': int(bug['location']['col']), - 'bug_file': files[int(bug['location']['file'])] - } + with open(filename, 'rb') as fp: + content = plistlib.load(fp) + files = content.get('files') + for bug in content.get('diagnostics', []): + if len(files) <= int(bug['location']['file']): + logging.warning('Parsing bug from "%s" failed', filename) + continue + + yield { + 'result': filename, + 'bug_type': bug['type'], + 'bug_category': bug['category'], + 'bug_line': int(bug['location']['line']), + 'bug_path_length': int(bug['location']['col']), + 'bug_file': files[int(bug['location']['file'])] + } def parse_bug_html(filename): diff --git a/clang/tools/scan-build-py/libscanbuild/resources/scanview.css b/clang/tools/scan-build-py/lib/libscanbuild/resources/scanview.css similarity index 100% rename from clang/tools/scan-build-py/libscanbuild/resources/scanview.css rename to clang/tools/scan-build-py/lib/libscanbuild/resources/scanview.css diff --git a/clang/tools/scan-build-py/libscanbuild/resources/selectable.js b/clang/tools/scan-build-py/lib/libscanbuild/resources/selectable.js similarity index 60% rename from clang/tools/scan-build-py/libscanbuild/resources/selectable.js rename to clang/tools/scan-build-py/lib/libscanbuild/resources/selectable.js index 53f6a8da13d8c..c88ee78568ef7 100644 --- a/clang/tools/scan-build-py/libscanbuild/resources/selectable.js +++ b/clang/tools/scan-build-py/lib/libscanbuild/resources/selectable.js @@ -1,7 +1,6 @@ -function SetDisplay(RowClass, DisplayVal) -{ +function SetDisplay(RowClass, DisplayVal) { var Rows = document.getElementsByTagName("tr"); - for ( var i = 0 ; i < Rows.length; ++i ) { + for (var i = 0; i < Rows.length; ++i) { if (Rows[i].className == RowClass) { Rows[i].style.display = DisplayVal; } @@ -10,24 +9,24 @@ function SetDisplay(RowClass, DisplayVal) function CopyCheckedStateToCheckButtons(SummaryCheckButton) { var Inputs = document.getElementsByTagName("input"); - for ( var i = 0 ; i < Inputs.length; ++i ) { + for (var i = 0; i < Inputs.length; ++i) { if (Inputs[i].type == "checkbox") { - if(Inputs[i] != SummaryCheckButton) { + if (Inputs[i] != SummaryCheckButton) { Inputs[i].checked = SummaryCheckButton.checked; Inputs[i].onclick(); - } + } } } } -function returnObjById( id ) { - if (document.getElementById) - var returnVar = document.getElementById(id); - else if (document.all) - var returnVar = document.all[id]; - else if (document.layers) - var returnVar = document.layers[id]; - return returnVar; +function returnObjById(id) { + if (document.getElementById) + var returnVar = document.getElementById(id); + else if (document.all) + var returnVar = document.all[id]; + else if (document.layers) + var returnVar = document.layers[id]; + return returnVar; } var NumUnchecked = 0; @@ -38,8 +37,7 @@ function ToggleDisplay(CheckButton, ClassName) { if (--NumUnchecked == 0) { returnObjById("AllBugsCheck").checked = true; } - } - else { + } else { SetDisplay(ClassName, "none"); NumUnchecked++; returnObjById("AllBugsCheck").checked = false; diff --git a/clang/tools/scan-build-py/lib/libscanbuild/resources/sorttable.js b/clang/tools/scan-build-py/lib/libscanbuild/resources/sorttable.js new file mode 100644 index 0000000000000..b98f012e34d65 --- /dev/null +++ b/clang/tools/scan-build-py/lib/libscanbuild/resources/sorttable.js @@ -0,0 +1,535 @@ +/* + SortTable + version 2 + 7th April 2007 + Stuart Langridge, http://www.kryogenix.org/code/browser/sorttable/ + + Instructions: + Download this file + Add to your HTML + Add class="sortable" to any table you'd like to make sortable + Click on the headers to sort + + Thanks to many, many people for contributions and suggestions. + Licenced as X11: http://www.kryogenix.org/code/browser/licence.html + This basically means: do what you want with it. +*/ + +var stIsIE = /*@cc_on!@*/ false; + +sorttable = { + init : function() { + // quit if this function has already been called + if (arguments.callee.done) + return; + // flag this function so we don't do the same thing twice + arguments.callee.done = true; + // kill the timer + if (_timer) + clearInterval(_timer); + + if (!document.createElement || !document.getElementsByTagName) + return; + + sorttable.DATE_RE = /^(\d\d?)[\/\.-](\d\d?)[\/\.-]((\d\d)?\d\d)$/; + + forEach(document.getElementsByTagName('table'), function(table) { + if (table.className.search(/\bsortable\b/) != -1) { + sorttable.makeSortable(table); + } + }); + }, + + makeSortable : function(table) { + if (table.getElementsByTagName('thead').length == 0) { + // table doesn't have a tHead. Since it should have, create one and + // put the first table row in it. + the = document.createElement('thead'); + the.appendChild(table.rows[0]); + table.insertBefore(the, table.firstChild); + } + // Safari doesn't support table.tHead, sigh + if (table.tHead == null) + table.tHead = table.getElementsByTagName('thead')[0]; + + if (table.tHead.rows.length != 1) + return; // can't cope with two header rows + + // Sorttable v1 put rows with a class of "sortbottom" at the bottom (as + // "total" rows, for example). This is B&R, since what you're supposed + // to do is put them in a tfoot. So, if there are sortbottom rows, + // for backward compatibility, move them to tfoot (creating it if needed). + sortbottomrows = []; + for (var i = 0; i < table.rows.length; i++) { + if (table.rows[i].className.search(/\bsortbottom\b/) != -1) { + sortbottomrows[sortbottomrows.length] = table.rows[i]; + } + } + if (sortbottomrows) { + if (table.tFoot == null) { + // table doesn't have a tfoot. Create one. + tfo = document.createElement('tfoot'); + table.appendChild(tfo); + } + for (var i = 0; i < sortbottomrows.length; i++) { + tfo.appendChild(sortbottomrows[i]); + } + delete sortbottomrows; + } + + // work through each column and calculate its type + headrow = table.tHead.rows[0].cells; + for (var i = 0; i < headrow.length; i++) { + // manually override the type with a sorttable_type attribute + if (!headrow[i].className.match( + /\bsorttable_nosort\b/)) { // skip this col + mtch = headrow[i].className.match(/\bsorttable_([a-z0-9]+)\b/); + if (mtch) { + override = mtch[1]; + } + if (mtch && typeof sorttable["sort_" + override] == 'function') { + headrow[i].sorttable_sortfunction = sorttable["sort_" + override]; + } else { + headrow[i].sorttable_sortfunction = sorttable.guessType(table, i); + } + // make it clickable to sort + headrow[i].sorttable_columnindex = i; + headrow[i].sorttable_tbody = table.tBodies[0]; + dean_addEvent(headrow[i], "click", function(e) { + if (this.className.search(/\bsorttable_sorted\b/) != -1) { + // if we're already sorted by this column, just + // reverse the table, which is quicker + sorttable.reverse(this.sorttable_tbody); + this.className = this.className.replace('sorttable_sorted', + 'sorttable_sorted_reverse'); + this.removeChild(document.getElementById('sorttable_sortfwdind')); + sortrevind = document.createElement('span'); + sortrevind.id = "sorttable_sortrevind"; + sortrevind.innerHTML = stIsIE + ? ' 5' + : ' ▴'; + this.appendChild(sortrevind); + return; + } + if (this.className.search(/\bsorttable_sorted_reverse\b/) != -1) { + // if we're already sorted by this column in reverse, just + // re-reverse the table, which is quicker + sorttable.reverse(this.sorttable_tbody); + this.className = this.className.replace('sorttable_sorted_reverse', + 'sorttable_sorted'); + this.removeChild(document.getElementById('sorttable_sortrevind')); + sortfwdind = document.createElement('span'); + sortfwdind.id = "sorttable_sortfwdind"; + sortfwdind.innerHTML = stIsIE + ? ' 6' + : ' ▾'; + this.appendChild(sortfwdind); + return; + } + + // remove sorttable_sorted classes + theadrow = this.parentNode; + forEach(theadrow.childNodes, function(cell) { + if (cell.nodeType == 1) { // an element + cell.className = + cell.className.replace('sorttable_sorted_reverse', ''); + cell.className = cell.className.replace('sorttable_sorted', ''); + } + }); + sortfwdind = document.getElementById('sorttable_sortfwdind'); + if (sortfwdind) { + sortfwdind.parentNode.removeChild(sortfwdind); + } + sortrevind = document.getElementById('sorttable_sortrevind'); + if (sortrevind) { + sortrevind.parentNode.removeChild(sortrevind); + } + + this.className += ' sorttable_sorted'; + sortfwdind = document.createElement('span'); + sortfwdind.id = "sorttable_sortfwdind"; + sortfwdind.innerHTML = + stIsIE ? ' 6' : ' ▾'; + this.appendChild(sortfwdind); + + // build an array to sort. This is a Schwartzian transform thing, + // i.e., we "decorate" each row with the actual sort key, + // sort based on the sort keys, and then put the rows back in order + // which is a lot faster because you only do getInnerText once per row + row_array = []; + col = this.sorttable_columnindex; + rows = this.sorttable_tbody.rows; + for (var j = 0; j < rows.length; j++) { + row_array[row_array.length] = + [ sorttable.getInnerText(rows[j].cells[col]), rows[j] ]; + } + /* If you want a stable sort, uncomment the following line */ + sorttable.shaker_sort(row_array, this.sorttable_sortfunction); + /* and comment out this one */ + // row_array.sort(this.sorttable_sortfunction); + + tb = this.sorttable_tbody; + for (var j = 0; j < row_array.length; j++) { + tb.appendChild(row_array[j][1]); + } + + delete row_array; + }); + } + } + }, + + guessType : function(table, column) { + // guess the type of a column based on its first non-blank row + sortfn = sorttable.sort_alpha; + for (var i = 0; i < table.tBodies[0].rows.length; i++) { + text = sorttable.getInnerText(table.tBodies[0].rows[i].cells[column]); + if (text != '') { + if (text.match(/^-?[」$、]?[\d,.]+%?$/)) { + return sorttable.sort_numeric; + } + // check for a date: dd/mm/yyyy or dd/mm/yy + // can have / or . or - as separator + // can be mm/dd as well + possdate = text.match(sorttable.DATE_RE) + if (possdate) { + // looks like a date + first = parseInt(possdate[1]); + second = parseInt(possdate[2]); + if (first > 12) { + // definitely dd/mm + return sorttable.sort_ddmm; + } else if (second > 12) { + return sorttable.sort_mmdd; + } else { + // looks like a date, but we can't tell which, so assume + // that it's dd/mm (English imperialism!) and keep looking + sortfn = sorttable.sort_ddmm; + } + } + } + } + return sortfn; + }, + + getInnerText : function(node) { + // gets the text we want to use for sorting for a cell. + // strips leading and trailing whitespace. + // this is *not* a generic getInnerText function; it's special to sorttable. + // for example, you can override the cell text with a customkey attribute. + // it also gets .value for fields. + + hasInputs = (typeof node.getElementsByTagName == 'function') && + node.getElementsByTagName('input').length; + + if (node.getAttribute("sorttable_customkey") != null) { + return node.getAttribute("sorttable_customkey"); + } else if (typeof node.textContent != 'undefined' && !hasInputs) { + return node.textContent.replace(/^\s+|\s+$/g, ''); + } else if (typeof node.innerText != 'undefined' && !hasInputs) { + return node.innerText.replace(/^\s+|\s+$/g, ''); + } else if (typeof node.text != 'undefined' && !hasInputs) { + return node.text.replace(/^\s+|\s+$/g, ''); + } else { + switch (node.nodeType) { + case 3: + if (node.nodeName.toLowerCase() == 'input') { + return node.value.replace(/^\s+|\s+$/g, ''); + } + case 4: + return node.nodeValue.replace(/^\s+|\s+$/g, ''); + break; + case 1: + case 11: + var innerText = ''; + for (var i = 0; i < node.childNodes.length; i++) { + innerText += sorttable.getInnerText(node.childNodes[i]); + } + return innerText.replace(/^\s+|\s+$/g, ''); + break; + default: + return ''; + } + } + }, + + reverse : function(tbody) { + // reverse the rows in a tbody + newrows = []; + for (var i = 0; i < tbody.rows.length; i++) { + newrows[newrows.length] = tbody.rows[i]; + } + for (var i = newrows.length - 1; i >= 0; i--) { + tbody.appendChild(newrows[i]); + } + delete newrows; + }, + + /* sort functions + each sort function takes two parameters, a and b + you are comparing a[0] and b[0] */ + sort_numeric : function(a, b) { + aa = parseFloat(a[0].replace(/[^0-9.-]/g, '')); + if (isNaN(aa)) + aa = 0; + bb = parseFloat(b[0].replace(/[^0-9.-]/g, '')); + if (isNaN(bb)) + bb = 0; + return aa - bb; + }, + sort_alpha : function(a, b) { + if (a[0] == b[0]) + return 0; + if (a[0] < b[0]) + return -1; + return 1; + }, + sort_ddmm : function(a, b) { + mtch = a[0].match(sorttable.DATE_RE); + y = mtch[3]; + m = mtch[2]; + d = mtch[1]; + if (m.length == 1) + m = '0' + m; + if (d.length == 1) + d = '0' + d; + dt1 = y + m + d; + mtch = b[0].match(sorttable.DATE_RE); + y = mtch[3]; + m = mtch[2]; + d = mtch[1]; + if (m.length == 1) + m = '0' + m; + if (d.length == 1) + d = '0' + d; + dt2 = y + m + d; + if (dt1 == dt2) + return 0; + if (dt1 < dt2) + return -1; + return 1; + }, + sort_mmdd : function(a, b) { + mtch = a[0].match(sorttable.DATE_RE); + y = mtch[3]; + d = mtch[2]; + m = mtch[1]; + if (m.length == 1) + m = '0' + m; + if (d.length == 1) + d = '0' + d; + dt1 = y + m + d; + mtch = b[0].match(sorttable.DATE_RE); + y = mtch[3]; + d = mtch[2]; + m = mtch[1]; + if (m.length == 1) + m = '0' + m; + if (d.length == 1) + d = '0' + d; + dt2 = y + m + d; + if (dt1 == dt2) + return 0; + if (dt1 < dt2) + return -1; + return 1; + }, + + shaker_sort : function(list, comp_func) { + // A stable sort function to allow multi-level sorting of data + // see: http://en.wikipedia.org/wiki/Cocktail_sort + // thanks to Joseph Nahmias + var b = 0; + var t = list.length - 1; + var swap = true; + + while (swap) { + swap = false; + for (var i = b; i < t; ++i) { + if (comp_func(list[i], list[i + 1]) > 0) { + var q = list[i]; + list[i] = list[i + 1]; + list[i + 1] = q; + swap = true; + } + } // for + t--; + + if (!swap) + break; + + for (var i = t; i > b; --i) { + if (comp_func(list[i], list[i - 1]) < 0) { + var q = list[i]; + list[i] = list[i - 1]; + list[i - 1] = q; + swap = true; + } + } // for + b++; + + } // while(swap) + } +} + +/* ****************************************************************** + Supporting functions: bundled here to avoid depending on a library + ****************************************************************** */ + +// Dean Edwards/Matthias Miller/John Resig + +/* for Mozilla/Opera9 */ +if (document.addEventListener) { + document.addEventListener("DOMContentLoaded", sorttable.init, false); +} + +/* for Internet Explorer */ +/*@cc_on @*/ +/*@if (@_win32) + document.write("